1 ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTRICTALIGN %s 2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+strict-align -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=STRICTALIGN %s 3 4 ; This file contains tests for the AArch64 load/store optimizer. 5 6 %padding = type { i8*, i8*, i8*, i8* } 7 %s.byte = type { i8, i8 } 8 %s.halfword = type { i16, i16 } 9 %s.word = type { i32, i32 } 10 %s.doubleword = type { i64, i32 } 11 %s.quadword = type { fp128, i32 } 12 %s.float = type { float, i32 } 13 %s.double = type { double, i32 } 14 %struct.byte = type { %padding, %s.byte } 15 %struct.halfword = type { %padding, %s.halfword } 16 %struct.word = type { %padding, %s.word } 17 %struct.doubleword = type { %padding, %s.doubleword } 18 %struct.quadword = type { %padding, %s.quadword } 19 %struct.float = type { %padding, %s.float } 20 %struct.double = type { %padding, %s.double } 21 22 ; Check the following transform: 23 ; 24 ; (ldr|str) X, [x0, #32] 25 ; ... 26 ; add x0, x0, #32 27 ; -> 28 ; (ldr|str) X, [x0, #32]! 29 ; 30 ; with X being either w1, x1, s0, d0 or q0. 31 32 declare void @bar_byte(%s.byte*, i8) 33 34 define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind { 35 ; CHECK-LABEL: load-pre-indexed-byte 36 ; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 37 entry: 38 %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0 39 %add = load i8, i8* %a, align 4 40 br label %bar 41 bar: 42 %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1 43 tail call void @bar_byte(%s.byte* %c, i8 %add) 44 ret void 45 } 46 47 define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind { 48 ; CHECK-LABEL: store-pre-indexed-byte 49 ; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 50 entry: 51 %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0 52 store i8 %val, i8* %a, align 4 53 br label %bar 54 bar: 55 %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1 56 tail call void @bar_byte(%s.byte* %c, i8 %val) 57 ret void 58 } 59 60 declare void @bar_halfword(%s.halfword*, i16) 61 62 define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind { 63 ; CHECK-LABEL: load-pre-indexed-halfword 64 ; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 65 entry: 66 %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0 67 %add = load i16, i16* %a, align 4 68 br label %bar 69 bar: 70 %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1 71 tail call void @bar_halfword(%s.halfword* %c, i16 %add) 72 ret void 73 } 74 75 define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind { 76 ; CHECK-LABEL: store-pre-indexed-halfword 77 ; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 78 entry: 79 %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0 80 store i16 %val, i16* %a, align 4 81 br label %bar 82 bar: 83 %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1 84 tail call void @bar_halfword(%s.halfword* %c, i16 %val) 85 ret void 86 } 87 88 declare void @bar_word(%s.word*, i32) 89 90 define void @load-pre-indexed-word(%struct.word* %ptr) nounwind { 91 ; CHECK-LABEL: load-pre-indexed-word 92 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 93 entry: 94 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 95 %add = load i32, i32* %a, align 4 96 br label %bar 97 bar: 98 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 99 tail call void @bar_word(%s.word* %c, i32 %add) 100 ret void 101 } 102 103 define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind { 104 ; CHECK-LABEL: store-pre-indexed-word 105 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]! 106 entry: 107 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 108 store i32 %val, i32* %a, align 4 109 br label %bar 110 bar: 111 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 112 tail call void @bar_word(%s.word* %c, i32 %val) 113 ret void 114 } 115 116 declare void @bar_doubleword(%s.doubleword*, i64) 117 118 define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind { 119 ; CHECK-LABEL: load-pre-indexed-doubleword 120 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]! 121 entry: 122 %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0 123 %add = load i64, i64* %a, align 8 124 br label %bar 125 bar: 126 %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1 127 tail call void @bar_doubleword(%s.doubleword* %c, i64 %add) 128 ret void 129 } 130 131 define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) nounwind { 132 ; CHECK-LABEL: store-pre-indexed-doubleword 133 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]! 134 entry: 135 %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0 136 store i64 %val, i64* %a, align 8 137 br label %bar 138 bar: 139 %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1 140 tail call void @bar_doubleword(%s.doubleword* %c, i64 %val) 141 ret void 142 } 143 144 declare void @bar_quadword(%s.quadword*, fp128) 145 146 define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind { 147 ; CHECK-LABEL: load-pre-indexed-quadword 148 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 149 entry: 150 %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0 151 %add = load fp128, fp128* %a, align 16 152 br label %bar 153 bar: 154 %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1 155 tail call void @bar_quadword(%s.quadword* %c, fp128 %add) 156 ret void 157 } 158 159 define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) nounwind { 160 ; CHECK-LABEL: store-pre-indexed-quadword 161 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 162 entry: 163 %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0 164 store fp128 %val, fp128* %a, align 16 165 br label %bar 166 bar: 167 %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1 168 tail call void @bar_quadword(%s.quadword* %c, fp128 %val) 169 ret void 170 } 171 172 declare void @bar_float(%s.float*, float) 173 174 define void @load-pre-indexed-float(%struct.float* %ptr) nounwind { 175 ; CHECK-LABEL: load-pre-indexed-float 176 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]! 177 entry: 178 %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0 179 %add = load float, float* %a, align 4 180 br label %bar 181 bar: 182 %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1 183 tail call void @bar_float(%s.float* %c, float %add) 184 ret void 185 } 186 187 define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind { 188 ; CHECK-LABEL: store-pre-indexed-float 189 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]! 190 entry: 191 %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0 192 store float %val, float* %a, align 4 193 br label %bar 194 bar: 195 %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1 196 tail call void @bar_float(%s.float* %c, float %val) 197 ret void 198 } 199 200 declare void @bar_double(%s.double*, double) 201 202 define void @load-pre-indexed-double(%struct.double* %ptr) nounwind { 203 ; CHECK-LABEL: load-pre-indexed-double 204 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]! 205 entry: 206 %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0 207 %add = load double, double* %a, align 8 208 br label %bar 209 bar: 210 %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1 211 tail call void @bar_double(%s.double* %c, double %add) 212 ret void 213 } 214 215 define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwind { 216 ; CHECK-LABEL: store-pre-indexed-double 217 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]! 218 entry: 219 %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0 220 store double %val, double* %a, align 8 221 br label %bar 222 bar: 223 %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1 224 tail call void @bar_double(%s.double* %c, double %val) 225 ret void 226 } 227 228 ; Check the following transform: 229 ; 230 ; (ldp|stp) w1, w2 [x0, #32] 231 ; ... 232 ; add x0, x0, #32 233 ; -> 234 ; (ldp|stp) w1, w2, [x0, #32]! 235 ; 236 237 define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind { 238 ; CHECK-LABEL: load-pair-pre-indexed-word 239 ; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]! 240 ; CHECK-NOT: add x0, x0, #32 241 entry: 242 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 243 %a1 = load i32, i32* %a, align 4 244 %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1 245 %b1 = load i32, i32* %b, align 4 246 %add = add i32 %a1, %b1 247 br label %bar 248 bar: 249 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 250 tail call void @bar_word(%s.word* %c, i32 %add) 251 ret void 252 } 253 254 define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind { 255 ; CHECK-LABEL: store-pair-pre-indexed-word 256 ; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]! 257 ; CHECK-NOT: add x0, x0, #32 258 entry: 259 %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 260 store i32 %val, i32* %a, align 4 261 %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1 262 store i32 %val, i32* %b, align 4 263 br label %bar 264 bar: 265 %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 266 tail call void @bar_word(%s.word* %c, i32 %val) 267 ret void 268 } 269 270 ; Check the following transform: 271 ; 272 ; add x8, x8, #16 273 ; ... 274 ; ldr X, [x8] 275 ; -> 276 ; ldr X, [x8, #16]! 277 ; 278 ; with X being either w0, x0, s0, d0 or q0. 279 280 %pre.struct.i32 = type { i32, i32, i32, i32, i32} 281 %pre.struct.i64 = type { i32, i64, i64, i64, i64} 282 %pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>} 283 %pre.struct.float = type { i32, float, float, float} 284 %pre.struct.double = type { i32, double, double, double} 285 286 define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, 287 %pre.struct.i32* %load2) nounwind { 288 ; CHECK-LABEL: load-pre-indexed-word2 289 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]! 290 br i1 %cond, label %if.then, label %if.end 291 if.then: 292 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 293 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1 294 br label %return 295 if.end: 296 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2 297 br label %return 298 return: 299 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 300 %ret = load i32, i32* %retptr 301 ret i32 %ret 302 } 303 304 define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, 305 %pre.struct.i64* %load2) nounwind { 306 ; CHECK-LABEL: load-pre-indexed-doubleword2 307 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]! 308 br i1 %cond, label %if.then, label %if.end 309 if.then: 310 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 311 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1 312 br label %return 313 if.end: 314 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2 315 br label %return 316 return: 317 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 318 %ret = load i64, i64* %retptr 319 ret i64 %ret 320 } 321 322 define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, 323 %pre.struct.i128* %load2) nounwind { 324 ; CHECK-LABEL: load-pre-indexed-quadword2 325 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]! 326 br i1 %cond, label %if.then, label %if.end 327 if.then: 328 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 329 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1 330 br label %return 331 if.end: 332 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2 333 br label %return 334 return: 335 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 336 %ret = load <2 x i64>, <2 x i64>* %retptr 337 ret <2 x i64> %ret 338 } 339 340 define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, 341 %pre.struct.float* %load2) nounwind { 342 ; CHECK-LABEL: load-pre-indexed-float2 343 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]! 344 br i1 %cond, label %if.then, label %if.end 345 if.then: 346 %load1 = load %pre.struct.float*, %pre.struct.float** %this 347 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1 348 br label %return 349 if.end: 350 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2 351 br label %return 352 return: 353 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 354 %ret = load float, float* %retptr 355 ret float %ret 356 } 357 358 define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, 359 %pre.struct.double* %load2) nounwind { 360 ; CHECK-LABEL: load-pre-indexed-double2 361 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]! 362 br i1 %cond, label %if.then, label %if.end 363 if.then: 364 %load1 = load %pre.struct.double*, %pre.struct.double** %this 365 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1 366 br label %return 367 if.end: 368 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2 369 br label %return 370 return: 371 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 372 %ret = load double, double* %retptr 373 ret double %ret 374 } 375 376 define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, 377 %pre.struct.i32* %load2) nounwind { 378 ; CHECK-LABEL: load-pre-indexed-word3 379 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]! 380 br i1 %cond, label %if.then, label %if.end 381 if.then: 382 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 383 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 384 br label %return 385 if.end: 386 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 387 br label %return 388 return: 389 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 390 %ret = load i32, i32* %retptr 391 ret i32 %ret 392 } 393 394 define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, 395 %pre.struct.i64* %load2) nounwind { 396 ; CHECK-LABEL: load-pre-indexed-doubleword3 397 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]! 398 br i1 %cond, label %if.then, label %if.end 399 if.then: 400 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 401 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2 402 br label %return 403 if.end: 404 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3 405 br label %return 406 return: 407 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 408 %ret = load i64, i64* %retptr 409 ret i64 %ret 410 } 411 412 define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, 413 %pre.struct.i128* %load2) nounwind { 414 ; CHECK-LABEL: load-pre-indexed-quadword3 415 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 416 br i1 %cond, label %if.then, label %if.end 417 if.then: 418 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 419 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 420 br label %return 421 if.end: 422 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 423 br label %return 424 return: 425 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 426 %ret = load <2 x i64>, <2 x i64>* %retptr 427 ret <2 x i64> %ret 428 } 429 430 define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, 431 %pre.struct.float* %load2) nounwind { 432 ; CHECK-LABEL: load-pre-indexed-float3 433 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]! 434 br i1 %cond, label %if.then, label %if.end 435 if.then: 436 %load1 = load %pre.struct.float*, %pre.struct.float** %this 437 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 438 br label %return 439 if.end: 440 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 441 br label %return 442 return: 443 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 444 %ret = load float, float* %retptr 445 ret float %ret 446 } 447 448 define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, 449 %pre.struct.double* %load2) nounwind { 450 ; CHECK-LABEL: load-pre-indexed-double3 451 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]! 452 br i1 %cond, label %if.then, label %if.end 453 if.then: 454 %load1 = load %pre.struct.double*, %pre.struct.double** %this 455 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 456 br label %return 457 if.end: 458 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 459 br label %return 460 return: 461 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 462 %ret = load double, double* %retptr 463 ret double %ret 464 } 465 466 ; Check the following transform: 467 ; 468 ; add x8, x8, #16 469 ; ... 470 ; str X, [x8] 471 ; -> 472 ; str X, [x8, #16]! 473 ; 474 ; with X being either w0, x0, s0, d0 or q0. 475 476 define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, 477 %pre.struct.i32* %load2, 478 i32 %val) nounwind { 479 ; CHECK-LABEL: store-pre-indexed-word2 480 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]! 481 br i1 %cond, label %if.then, label %if.end 482 if.then: 483 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 484 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1 485 br label %return 486 if.end: 487 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2 488 br label %return 489 return: 490 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 491 store i32 %val, i32* %retptr 492 ret void 493 } 494 495 define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, 496 %pre.struct.i64* %load2, 497 i64 %val) nounwind { 498 ; CHECK-LABEL: store-pre-indexed-doubleword2 499 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]! 500 br i1 %cond, label %if.then, label %if.end 501 if.then: 502 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 503 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1 504 br label %return 505 if.end: 506 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2 507 br label %return 508 return: 509 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 510 store i64 %val, i64* %retptr 511 ret void 512 } 513 514 define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, 515 %pre.struct.i128* %load2, 516 <2 x i64> %val) nounwind { 517 ; CHECK-LABEL: store-pre-indexed-quadword2 518 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]! 519 br i1 %cond, label %if.then, label %if.end 520 if.then: 521 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 522 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1 523 br label %return 524 if.end: 525 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2 526 br label %return 527 return: 528 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 529 store <2 x i64> %val, <2 x i64>* %retptr 530 ret void 531 } 532 533 define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, 534 %pre.struct.float* %load2, 535 float %val) nounwind { 536 ; CHECK-LABEL: store-pre-indexed-float2 537 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]! 538 br i1 %cond, label %if.then, label %if.end 539 if.then: 540 %load1 = load %pre.struct.float*, %pre.struct.float** %this 541 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1 542 br label %return 543 if.end: 544 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2 545 br label %return 546 return: 547 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 548 store float %val, float* %retptr 549 ret void 550 } 551 552 define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, 553 %pre.struct.double* %load2, 554 double %val) nounwind { 555 ; CHECK-LABEL: store-pre-indexed-double2 556 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]! 557 br i1 %cond, label %if.then, label %if.end 558 if.then: 559 %load1 = load %pre.struct.double*, %pre.struct.double** %this 560 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1 561 br label %return 562 if.end: 563 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2 564 br label %return 565 return: 566 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 567 store double %val, double* %retptr 568 ret void 569 } 570 571 define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, 572 %pre.struct.i32* %load2, 573 i32 %val) nounwind { 574 ; CHECK-LABEL: store-pre-indexed-word3 575 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]! 576 br i1 %cond, label %if.then, label %if.end 577 if.then: 578 %load1 = load %pre.struct.i32*, %pre.struct.i32** %this 579 %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 580 br label %return 581 if.end: 582 %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 583 br label %return 584 return: 585 %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] 586 store i32 %val, i32* %retptr 587 ret void 588 } 589 590 define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, 591 %pre.struct.i64* %load2, 592 i64 %val) nounwind { 593 ; CHECK-LABEL: store-pre-indexed-doubleword3 594 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]! 595 br i1 %cond, label %if.then, label %if.end 596 if.then: 597 %load1 = load %pre.struct.i64*, %pre.struct.i64** %this 598 %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3 599 br label %return 600 if.end: 601 %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4 602 br label %return 603 return: 604 %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] 605 store i64 %val, i64* %retptr 606 ret void 607 } 608 609 define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, 610 %pre.struct.i128* %load2, 611 <2 x i64> %val) nounwind { 612 ; CHECK-LABEL: store-pre-indexed-quadword3 613 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! 614 br i1 %cond, label %if.then, label %if.end 615 if.then: 616 %load1 = load %pre.struct.i128*, %pre.struct.i128** %this 617 %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 618 br label %return 619 if.end: 620 %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 621 br label %return 622 return: 623 %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] 624 store <2 x i64> %val, <2 x i64>* %retptr 625 ret void 626 } 627 628 define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, 629 %pre.struct.float* %load2, 630 float %val) nounwind { 631 ; CHECK-LABEL: store-pre-indexed-float3 632 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]! 633 br i1 %cond, label %if.then, label %if.end 634 if.then: 635 %load1 = load %pre.struct.float*, %pre.struct.float** %this 636 %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 637 br label %return 638 if.end: 639 %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 640 br label %return 641 return: 642 %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] 643 store float %val, float* %retptr 644 ret void 645 } 646 647 define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, 648 %pre.struct.double* %load2, 649 double %val) nounwind { 650 ; CHECK-LABEL: store-pre-indexed-double3 651 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]! 652 br i1 %cond, label %if.then, label %if.end 653 if.then: 654 %load1 = load %pre.struct.double*, %pre.struct.double** %this 655 %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 656 br label %return 657 if.end: 658 %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 659 br label %return 660 return: 661 %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] 662 store double %val, double* %retptr 663 ret void 664 } 665 666 ; Check the following transform: 667 ; 668 ; ldr X, [x20] 669 ; ... 670 ; add x20, x20, #32 671 ; -> 672 ; ldr X, [x20], #32 673 ; 674 ; with X being either w0, x0, s0, d0 or q0. 675 676 define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind { 677 ; CHECK-LABEL: load-post-indexed-byte 678 ; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4 679 entry: 680 %gep1 = getelementptr i8, i8* %array, i64 2 681 br label %body 682 683 body: 684 %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ] 685 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 686 %gep2 = getelementptr i8, i8* %iv2, i64 -1 687 %load = load i8, i8* %gep2 688 call void @use-byte(i8 %load) 689 %load2 = load i8, i8* %iv2 690 call void @use-byte(i8 %load2) 691 %iv.next = add i64 %iv, -4 692 %gep3 = getelementptr i8, i8* %iv2, i64 4 693 %cond = icmp eq i64 %iv.next, 0 694 br i1 %cond, label %exit, label %body 695 696 exit: 697 ret void 698 } 699 700 define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind { 701 ; CHECK-LABEL: load-post-indexed-halfword 702 ; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8 703 entry: 704 %gep1 = getelementptr i16, i16* %array, i64 2 705 br label %body 706 707 body: 708 %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ] 709 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 710 %gep2 = getelementptr i16, i16* %iv2, i64 -1 711 %load = load i16, i16* %gep2 712 call void @use-halfword(i16 %load) 713 %load2 = load i16, i16* %iv2 714 call void @use-halfword(i16 %load2) 715 %iv.next = add i64 %iv, -4 716 %gep3 = getelementptr i16, i16* %iv2, i64 4 717 %cond = icmp eq i64 %iv.next, 0 718 br i1 %cond, label %exit, label %body 719 720 exit: 721 ret void 722 } 723 724 define void @load-post-indexed-word(i32* %array, i64 %count) nounwind { 725 ; CHECK-LABEL: load-post-indexed-word 726 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16 727 entry: 728 %gep1 = getelementptr i32, i32* %array, i64 2 729 br label %body 730 731 body: 732 %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] 733 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 734 %gep2 = getelementptr i32, i32* %iv2, i64 -1 735 %load = load i32, i32* %gep2 736 call void @use-word(i32 %load) 737 %load2 = load i32, i32* %iv2 738 call void @use-word(i32 %load2) 739 %iv.next = add i64 %iv, -4 740 %gep3 = getelementptr i32, i32* %iv2, i64 4 741 %cond = icmp eq i64 %iv.next, 0 742 br i1 %cond, label %exit, label %body 743 744 exit: 745 ret void 746 } 747 748 define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind { 749 ; CHECK-LABEL: load-post-indexed-doubleword 750 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32 751 entry: 752 %gep1 = getelementptr i64, i64* %array, i64 2 753 br label %body 754 755 body: 756 %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] 757 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 758 %gep2 = getelementptr i64, i64* %iv2, i64 -1 759 %load = load i64, i64* %gep2 760 call void @use-doubleword(i64 %load) 761 %load2 = load i64, i64* %iv2 762 call void @use-doubleword(i64 %load2) 763 %iv.next = add i64 %iv, -4 764 %gep3 = getelementptr i64, i64* %iv2, i64 4 765 %cond = icmp eq i64 %iv.next, 0 766 br i1 %cond, label %exit, label %body 767 768 exit: 769 ret void 770 } 771 772 define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind { 773 ; CHECK-LABEL: load-post-indexed-quadword 774 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64 775 entry: 776 %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2 777 br label %body 778 779 body: 780 %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] 781 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 782 %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1 783 %load = load <2 x i64>, <2 x i64>* %gep2 784 call void @use-quadword(<2 x i64> %load) 785 %load2 = load <2 x i64>, <2 x i64>* %iv2 786 call void @use-quadword(<2 x i64> %load2) 787 %iv.next = add i64 %iv, -4 788 %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4 789 %cond = icmp eq i64 %iv.next, 0 790 br i1 %cond, label %exit, label %body 791 792 exit: 793 ret void 794 } 795 796 define void @load-post-indexed-float(float* %array, i64 %count) nounwind { 797 ; CHECK-LABEL: load-post-indexed-float 798 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16 799 entry: 800 %gep1 = getelementptr float, float* %array, i64 2 801 br label %body 802 803 body: 804 %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] 805 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 806 %gep2 = getelementptr float, float* %iv2, i64 -1 807 %load = load float, float* %gep2 808 call void @use-float(float %load) 809 %load2 = load float, float* %iv2 810 call void @use-float(float %load2) 811 %iv.next = add i64 %iv, -4 812 %gep3 = getelementptr float, float* %iv2, i64 4 813 %cond = icmp eq i64 %iv.next, 0 814 br i1 %cond, label %exit, label %body 815 816 exit: 817 ret void 818 } 819 820 define void @load-post-indexed-double(double* %array, i64 %count) nounwind { 821 ; CHECK-LABEL: load-post-indexed-double 822 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32 823 entry: 824 %gep1 = getelementptr double, double* %array, i64 2 825 br label %body 826 827 body: 828 %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] 829 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 830 %gep2 = getelementptr double, double* %iv2, i64 -1 831 %load = load double, double* %gep2 832 call void @use-double(double %load) 833 %load2 = load double, double* %iv2 834 call void @use-double(double %load2) 835 %iv.next = add i64 %iv, -4 836 %gep3 = getelementptr double, double* %iv2, i64 4 837 %cond = icmp eq i64 %iv.next, 0 838 br i1 %cond, label %exit, label %body 839 840 exit: 841 ret void 842 } 843 844 ; Check the following transform: 845 ; 846 ; str X, [x20] 847 ; ... 848 ; add x20, x20, #32 849 ; -> 850 ; str X, [x20], #32 851 ; 852 ; with X being either w0, x0, s0, d0 or q0. 853 854 define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind { 855 ; CHECK-LABEL: store-post-indexed-byte 856 ; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4 857 entry: 858 %gep1 = getelementptr i8, i8* %array, i64 2 859 br label %body 860 861 body: 862 %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ] 863 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 864 %gep2 = getelementptr i8, i8* %iv2, i64 -1 865 %load = load i8, i8* %gep2 866 call void @use-byte(i8 %load) 867 store i8 %val, i8* %iv2 868 %iv.next = add i64 %iv, -4 869 %gep3 = getelementptr i8, i8* %iv2, i64 4 870 %cond = icmp eq i64 %iv.next, 0 871 br i1 %cond, label %exit, label %body 872 873 exit: 874 ret void 875 } 876 877 define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind { 878 ; CHECK-LABEL: store-post-indexed-halfword 879 ; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8 880 entry: 881 %gep1 = getelementptr i16, i16* %array, i64 2 882 br label %body 883 884 body: 885 %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ] 886 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 887 %gep2 = getelementptr i16, i16* %iv2, i64 -1 888 %load = load i16, i16* %gep2 889 call void @use-halfword(i16 %load) 890 store i16 %val, i16* %iv2 891 %iv.next = add i64 %iv, -4 892 %gep3 = getelementptr i16, i16* %iv2, i64 4 893 %cond = icmp eq i64 %iv.next, 0 894 br i1 %cond, label %exit, label %body 895 896 exit: 897 ret void 898 } 899 900 define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind { 901 ; CHECK-LABEL: store-post-indexed-word 902 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16 903 entry: 904 %gep1 = getelementptr i32, i32* %array, i64 2 905 br label %body 906 907 body: 908 %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] 909 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 910 %gep2 = getelementptr i32, i32* %iv2, i64 -1 911 %load = load i32, i32* %gep2 912 call void @use-word(i32 %load) 913 store i32 %val, i32* %iv2 914 %iv.next = add i64 %iv, -4 915 %gep3 = getelementptr i32, i32* %iv2, i64 4 916 %cond = icmp eq i64 %iv.next, 0 917 br i1 %cond, label %exit, label %body 918 919 exit: 920 ret void 921 } 922 923 define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind { 924 ; CHECK-LABEL: store-post-indexed-doubleword 925 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32 926 entry: 927 %gep1 = getelementptr i64, i64* %array, i64 2 928 br label %body 929 930 body: 931 %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] 932 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 933 %gep2 = getelementptr i64, i64* %iv2, i64 -1 934 %load = load i64, i64* %gep2 935 call void @use-doubleword(i64 %load) 936 store i64 %val, i64* %iv2 937 %iv.next = add i64 %iv, -4 938 %gep3 = getelementptr i64, i64* %iv2, i64 4 939 %cond = icmp eq i64 %iv.next, 0 940 br i1 %cond, label %exit, label %body 941 942 exit: 943 ret void 944 } 945 946 define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind { 947 ; CHECK-LABEL: store-post-indexed-quadword 948 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64 949 entry: 950 %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2 951 br label %body 952 953 body: 954 %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] 955 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 956 %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1 957 %load = load <2 x i64>, <2 x i64>* %gep2 958 call void @use-quadword(<2 x i64> %load) 959 store <2 x i64> %val, <2 x i64>* %iv2 960 %iv.next = add i64 %iv, -4 961 %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4 962 %cond = icmp eq i64 %iv.next, 0 963 br i1 %cond, label %exit, label %body 964 965 exit: 966 ret void 967 } 968 969 define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind { 970 ; CHECK-LABEL: store-post-indexed-float 971 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16 972 entry: 973 %gep1 = getelementptr float, float* %array, i64 2 974 br label %body 975 976 body: 977 %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] 978 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 979 %gep2 = getelementptr float, float* %iv2, i64 -1 980 %load = load float, float* %gep2 981 call void @use-float(float %load) 982 store float %val, float* %iv2 983 %iv.next = add i64 %iv, -4 984 %gep3 = getelementptr float, float* %iv2, i64 4 985 %cond = icmp eq i64 %iv.next, 0 986 br i1 %cond, label %exit, label %body 987 988 exit: 989 ret void 990 } 991 992 define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind { 993 ; CHECK-LABEL: store-post-indexed-double 994 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32 995 entry: 996 %gep1 = getelementptr double, double* %array, i64 2 997 br label %body 998 999 body: 1000 %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] 1001 %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] 1002 %gep2 = getelementptr double, double* %iv2, i64 -1 1003 %load = load double, double* %gep2 1004 call void @use-double(double %load) 1005 store double %val, double* %iv2 1006 %iv.next = add i64 %iv, -4 1007 %gep3 = getelementptr double, double* %iv2, i64 4 1008 %cond = icmp eq i64 %iv.next, 0 1009 br i1 %cond, label %exit, label %body 1010 1011 exit: 1012 ret void 1013 } 1014 1015 declare void @use-byte(i8) 1016 declare void @use-halfword(i16) 1017 declare void @use-word(i32) 1018 declare void @use-doubleword(i64) 1019 declare void @use-quadword(<2 x i64>) 1020 declare void @use-float(float) 1021 declare void @use-double(double) 1022 1023 ; Check the following transform: 1024 ; 1025 ; stp w0, [x20] 1026 ; ... 1027 ; add x20, x20, #32 1028 ; -> 1029 ; stp w0, [x20], #32 1030 1031 define void @store-pair-post-indexed-word() nounwind { 1032 ; CHECK-LABEL: store-pair-post-indexed-word 1033 ; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16 1034 ; CHECK: ret 1035 %src = alloca { i32, i32 }, align 8 1036 %dst = alloca { i32, i32 }, align 8 1037 1038 %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0 1039 %src.real = load i32, i32* %src.realp 1040 %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1 1041 %src.imag = load i32, i32* %src.imagp 1042 1043 %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0 1044 %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1 1045 store i32 %src.real, i32* %dst.realp 1046 store i32 %src.imag, i32* %dst.imagp 1047 ret void 1048 } 1049 1050 define void @store-pair-post-indexed-doubleword() nounwind { 1051 ; CHECK-LABEL: store-pair-post-indexed-doubleword 1052 ; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32 1053 ; CHECK: ret 1054 %src = alloca { i64, i64 }, align 8 1055 %dst = alloca { i64, i64 }, align 8 1056 1057 %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0 1058 %src.real = load i64, i64* %src.realp 1059 %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1 1060 %src.imag = load i64, i64* %src.imagp 1061 1062 %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0 1063 %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1 1064 store i64 %src.real, i64* %dst.realp 1065 store i64 %src.imag, i64* %dst.imagp 1066 ret void 1067 } 1068 1069 define void @store-pair-post-indexed-float() nounwind { 1070 ; CHECK-LABEL: store-pair-post-indexed-float 1071 ; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16 1072 ; CHECK: ret 1073 %src = alloca { float, float }, align 8 1074 %dst = alloca { float, float }, align 8 1075 1076 %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0 1077 %src.real = load float, float* %src.realp 1078 %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1 1079 %src.imag = load float, float* %src.imagp 1080 1081 %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0 1082 %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1 1083 store float %src.real, float* %dst.realp 1084 store float %src.imag, float* %dst.imagp 1085 ret void 1086 } 1087 1088 define void @store-pair-post-indexed-double() nounwind { 1089 ; CHECK-LABEL: store-pair-post-indexed-double 1090 ; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32 1091 ; CHECK: ret 1092 %src = alloca { double, double }, align 8 1093 %dst = alloca { double, double }, align 8 1094 1095 %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0 1096 %src.real = load double, double* %src.realp 1097 %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1 1098 %src.imag = load double, double* %src.imagp 1099 1100 %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0 1101 %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1 1102 store double %src.real, double* %dst.realp 1103 store double %src.imag, double* %dst.imagp 1104 ret void 1105 } 1106 1107 ; Check the following transform: 1108 ; 1109 ; (ldr|str) X, [x20] 1110 ; ... 1111 ; sub x20, x20, #16 1112 ; -> 1113 ; (ldr|str) X, [x20], #-16 1114 ; 1115 ; with X being either w0, x0, s0, d0 or q0. 1116 1117 define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind { 1118 ; CHECK-LABEL: post-indexed-sub-word 1119 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8 1120 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8 1121 br label %for.body 1122 for.body: 1123 %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ] 1124 %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ] 1125 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1126 %gep1 = getelementptr i32, i32* %phi1, i64 -1 1127 %load1 = load i32, i32* %gep1 1128 %gep2 = getelementptr i32, i32* %phi2, i64 -1 1129 store i32 %load1, i32* %gep2 1130 %load2 = load i32, i32* %phi1 1131 store i32 %load2, i32* %phi2 1132 %dec.i = add nsw i64 %i, -1 1133 %gep3 = getelementptr i32, i32* %phi2, i64 -2 1134 %gep4 = getelementptr i32, i32* %phi1, i64 -2 1135 %cond = icmp sgt i64 %dec.i, 0 1136 br i1 %cond, label %for.body, label %end 1137 end: 1138 ret void 1139 } 1140 1141 define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind { 1142 ; CHECK-LABEL: post-indexed-sub-doubleword 1143 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16 1144 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16 1145 br label %for.body 1146 for.body: 1147 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1148 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1149 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1150 %gep1 = getelementptr i64, i64* %phi1, i64 -1 1151 %load1 = load i64, i64* %gep1 1152 %gep2 = getelementptr i64, i64* %phi2, i64 -1 1153 store i64 %load1, i64* %gep2 1154 %load2 = load i64, i64* %phi1 1155 store i64 %load2, i64* %phi2 1156 %dec.i = add nsw i64 %i, -1 1157 %gep3 = getelementptr i64, i64* %phi2, i64 -2 1158 %gep4 = getelementptr i64, i64* %phi1, i64 -2 1159 %cond = icmp sgt i64 %dec.i, 0 1160 br i1 %cond, label %for.body, label %end 1161 end: 1162 ret void 1163 } 1164 1165 define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind { 1166 ; CHECK-LABEL: post-indexed-sub-quadword 1167 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32 1168 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32 1169 br label %for.body 1170 for.body: 1171 %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ] 1172 %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ] 1173 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1174 %gep1 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -1 1175 %load1 = load <2 x i64>, <2 x i64>* %gep1 1176 %gep2 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -1 1177 store <2 x i64> %load1, <2 x i64>* %gep2 1178 %load2 = load <2 x i64>, <2 x i64>* %phi1 1179 store <2 x i64> %load2, <2 x i64>* %phi2 1180 %dec.i = add nsw i64 %i, -1 1181 %gep3 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -2 1182 %gep4 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -2 1183 %cond = icmp sgt i64 %dec.i, 0 1184 br i1 %cond, label %for.body, label %end 1185 end: 1186 ret void 1187 } 1188 1189 define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind { 1190 ; CHECK-LABEL: post-indexed-sub-float 1191 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8 1192 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8 1193 br label %for.body 1194 for.body: 1195 %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ] 1196 %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ] 1197 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1198 %gep1 = getelementptr float, float* %phi1, i64 -1 1199 %load1 = load float, float* %gep1 1200 %gep2 = getelementptr float, float* %phi2, i64 -1 1201 store float %load1, float* %gep2 1202 %load2 = load float, float* %phi1 1203 store float %load2, float* %phi2 1204 %dec.i = add nsw i64 %i, -1 1205 %gep3 = getelementptr float, float* %phi2, i64 -2 1206 %gep4 = getelementptr float, float* %phi1, i64 -2 1207 %cond = icmp sgt i64 %dec.i, 0 1208 br i1 %cond, label %for.body, label %end 1209 end: 1210 ret void 1211 } 1212 1213 define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind { 1214 ; CHECK-LABEL: post-indexed-sub-double 1215 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16 1216 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16 1217 br label %for.body 1218 for.body: 1219 %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ] 1220 %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ] 1221 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1222 %gep1 = getelementptr double, double* %phi1, i64 -1 1223 %load1 = load double, double* %gep1 1224 %gep2 = getelementptr double, double* %phi2, i64 -1 1225 store double %load1, double* %gep2 1226 %load2 = load double, double* %phi1 1227 store double %load2, double* %phi2 1228 %dec.i = add nsw i64 %i, -1 1229 %gep3 = getelementptr double, double* %phi2, i64 -2 1230 %gep4 = getelementptr double, double* %phi1, i64 -2 1231 %cond = icmp sgt i64 %dec.i, 0 1232 br i1 %cond, label %for.body, label %end 1233 end: 1234 ret void 1235 } 1236 1237 define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind { 1238 ; CHECK-LABEL: post-indexed-sub-doubleword-offset-min 1239 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256 1240 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256 1241 br label %for.body 1242 for.body: 1243 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1244 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1245 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1246 %gep1 = getelementptr i64, i64* %phi1, i64 1 1247 %load1 = load i64, i64* %gep1 1248 %gep2 = getelementptr i64, i64* %phi2, i64 1 1249 store i64 %load1, i64* %gep2 1250 %load2 = load i64, i64* %phi1 1251 store i64 %load2, i64* %phi2 1252 %dec.i = add nsw i64 %i, -1 1253 %gep3 = getelementptr i64, i64* %phi2, i64 -32 1254 %gep4 = getelementptr i64, i64* %phi1, i64 -32 1255 %cond = icmp sgt i64 %dec.i, 0 1256 br i1 %cond, label %for.body, label %end 1257 end: 1258 ret void 1259 } 1260 1261 define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind { 1262 ; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range 1263 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}] 1264 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 1265 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}] 1266 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 1267 1268 br label %for.body 1269 for.body: 1270 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1271 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1272 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1273 %gep1 = getelementptr i64, i64* %phi1, i64 1 1274 %load1 = load i64, i64* %gep1 1275 %gep2 = getelementptr i64, i64* %phi2, i64 1 1276 store i64 %load1, i64* %gep2 1277 %load2 = load i64, i64* %phi1 1278 store i64 %load2, i64* %phi2 1279 %dec.i = add nsw i64 %i, -1 1280 %gep3 = getelementptr i64, i64* %phi2, i64 32 1281 %gep4 = getelementptr i64, i64* %phi1, i64 32 1282 %cond = icmp sgt i64 %dec.i, 0 1283 br i1 %cond, label %for.body, label %end 1284 end: 1285 ret void 1286 } 1287 1288 define void @post-indexed-paired-min-offset(i64* %a, i64* %b, i64 %count) nounwind { 1289 ; CHECK-LABEL: post-indexed-paired-min-offset 1290 ; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512 1291 ; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512 1292 br label %for.body 1293 for.body: 1294 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1295 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1296 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1297 %gep1 = getelementptr i64, i64* %phi1, i64 1 1298 %load1 = load i64, i64* %gep1 1299 %gep2 = getelementptr i64, i64* %phi2, i64 1 1300 %load2 = load i64, i64* %phi1 1301 store i64 %load1, i64* %gep2 1302 store i64 %load2, i64* %phi2 1303 %dec.i = add nsw i64 %i, -1 1304 %gep3 = getelementptr i64, i64* %phi2, i64 -64 1305 %gep4 = getelementptr i64, i64* %phi1, i64 -64 1306 %cond = icmp sgt i64 %dec.i, 0 1307 br i1 %cond, label %for.body, label %end 1308 end: 1309 ret void 1310 } 1311 1312 define void @post-indexed-paired-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind { 1313 ; CHECK-LABEL: post-indexed-paired-offset-out-of-range 1314 ; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}] 1315 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512 1316 ; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}] 1317 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512 1318 br label %for.body 1319 for.body: 1320 %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] 1321 %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] 1322 %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] 1323 %gep1 = getelementptr i64, i64* %phi1, i64 1 1324 %load1 = load i64, i64* %phi1 1325 %gep2 = getelementptr i64, i64* %phi2, i64 1 1326 %load2 = load i64, i64* %gep1 1327 store i64 %load1, i64* %gep2 1328 store i64 %load2, i64* %phi2 1329 %dec.i = add nsw i64 %i, -1 1330 %gep3 = getelementptr i64, i64* %phi2, i64 64 1331 %gep4 = getelementptr i64, i64* %phi1, i64 64 1332 %cond = icmp sgt i64 %dec.i, 0 1333 br i1 %cond, label %for.body, label %end 1334 end: 1335 ret void 1336 } 1337 1338 ; DAGCombiner::MergeConsecutiveStores merges this into a vector store, 1339 ; replaceZeroVectorStore should split the vector store back into 1340 ; scalar stores which should get merged by AArch64LoadStoreOptimizer. 1341 define void @merge_zr32(i32* %p) { 1342 ; CHECK-LABEL: merge_zr32: 1343 ; CHECK: // %entry 1344 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1345 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1346 ; CHECK-NEXT: ret 1347 entry: 1348 store i32 0, i32* %p 1349 %p1 = getelementptr i32, i32* %p, i32 1 1350 store i32 0, i32* %p1 1351 ret void 1352 } 1353 1354 ; Same as merge_zr32 but the merged stores should also get paried. 1355 define void @merge_zr32_2(i32* %p) { 1356 ; CHECK-LABEL: merge_zr32_2: 1357 ; CHECK: // %entry 1358 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1359 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1360 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1361 ; CHECK-NEXT: ret 1362 entry: 1363 store i32 0, i32* %p 1364 %p1 = getelementptr i32, i32* %p, i32 1 1365 store i32 0, i32* %p1 1366 %p2 = getelementptr i32, i32* %p, i64 2 1367 store i32 0, i32* %p2 1368 %p3 = getelementptr i32, i32* %p, i64 3 1369 store i32 0, i32* %p3 1370 ret void 1371 } 1372 1373 ; Like merge_zr32_2, but checking the largest allowed stp immediate offset. 1374 define void @merge_zr32_2_offset(i32* %p) { 1375 ; CHECK-LABEL: merge_zr32_2_offset: 1376 ; CHECK: // %entry 1377 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504] 1378 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #504] 1379 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #508] 1380 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #512] 1381 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #516] 1382 ; CHECK-NEXT: ret 1383 entry: 1384 %p0 = getelementptr i32, i32* %p, i32 126 1385 store i32 0, i32* %p0 1386 %p1 = getelementptr i32, i32* %p, i32 127 1387 store i32 0, i32* %p1 1388 %p2 = getelementptr i32, i32* %p, i64 128 1389 store i32 0, i32* %p2 1390 %p3 = getelementptr i32, i32* %p, i64 129 1391 store i32 0, i32* %p3 1392 ret void 1393 } 1394 1395 ; Like merge_zr32, but replaceZeroVectorStore should not split this 1396 ; vector store since the address offset is too large for the stp 1397 ; instruction. 1398 define void @no_merge_zr32_2_offset(i32* %p) { 1399 ; CHECK-LABEL: no_merge_zr32_2_offset: 1400 ; CHECK: // %entry 1401 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1402 ; NOSTRICTALIGN-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096] 1403 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4096] 1404 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4100] 1405 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4104] 1406 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4108] 1407 ; CHECK-NEXT: ret 1408 entry: 1409 %p0 = getelementptr i32, i32* %p, i32 1024 1410 store i32 0, i32* %p0 1411 %p1 = getelementptr i32, i32* %p, i32 1025 1412 store i32 0, i32* %p1 1413 %p2 = getelementptr i32, i32* %p, i64 1026 1414 store i32 0, i32* %p2 1415 %p3 = getelementptr i32, i32* %p, i64 1027 1416 store i32 0, i32* %p3 1417 ret void 1418 } 1419 1420 ; Like merge_zr32, but replaceZeroVectorStore should not split the 1421 ; vector store since the zero constant vector has multiple uses, so we 1422 ; err on the side that allows for stp q instruction generation. 1423 define void @merge_zr32_3(i32* %p) { 1424 ; CHECK-LABEL: merge_zr32_3: 1425 ; CHECK: // %entry 1426 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1427 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1428 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1429 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1430 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #16] 1431 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #24] 1432 ; CHECK-NEXT: ret 1433 entry: 1434 store i32 0, i32* %p 1435 %p1 = getelementptr i32, i32* %p, i32 1 1436 store i32 0, i32* %p1 1437 %p2 = getelementptr i32, i32* %p, i64 2 1438 store i32 0, i32* %p2 1439 %p3 = getelementptr i32, i32* %p, i64 3 1440 store i32 0, i32* %p3 1441 %p4 = getelementptr i32, i32* %p, i64 4 1442 store i32 0, i32* %p4 1443 %p5 = getelementptr i32, i32* %p, i64 5 1444 store i32 0, i32* %p5 1445 %p6 = getelementptr i32, i32* %p, i64 6 1446 store i32 0, i32* %p6 1447 %p7 = getelementptr i32, i32* %p, i64 7 1448 store i32 0, i32* %p7 1449 ret void 1450 } 1451 1452 ; Like merge_zr32, but with 2-vector type. 1453 define void @merge_zr32_2vec(<2 x i32>* %p) { 1454 ; CHECK-LABEL: merge_zr32_2vec: 1455 ; CHECK: // %entry 1456 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1457 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1458 ; CHECK-NEXT: ret 1459 entry: 1460 store <2 x i32> zeroinitializer, <2 x i32>* %p 1461 ret void 1462 } 1463 1464 ; Like merge_zr32, but with 3-vector type. 1465 define void @merge_zr32_3vec(<3 x i32>* %p) { 1466 ; CHECK-LABEL: merge_zr32_3vec: 1467 ; CHECK: // %entry 1468 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1469 ; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] 1470 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1471 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] 1472 ; CHECK-NEXT: ret 1473 entry: 1474 store <3 x i32> zeroinitializer, <3 x i32>* %p 1475 ret void 1476 } 1477 1478 ; Like merge_zr32, but with 4-vector type. 1479 define void @merge_zr32_4vec(<4 x i32>* %p) { 1480 ; CHECK-LABEL: merge_zr32_4vec: 1481 ; CHECK: // %entry 1482 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1483 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1484 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1485 ; CHECK-NEXT: ret 1486 entry: 1487 store <4 x i32> zeroinitializer, <4 x i32>* %p 1488 ret void 1489 } 1490 1491 ; Like merge_zr32, but with 2-vector float type. 1492 define void @merge_zr32_2vecf(<2 x float>* %p) { 1493 ; CHECK-LABEL: merge_zr32_2vecf: 1494 ; CHECK: // %entry 1495 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] 1496 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1497 ; CHECK-NEXT: ret 1498 entry: 1499 store <2 x float> zeroinitializer, <2 x float>* %p 1500 ret void 1501 } 1502 1503 ; Like merge_zr32, but with 4-vector float type. 1504 define void @merge_zr32_4vecf(<4 x float>* %p) { 1505 ; CHECK-LABEL: merge_zr32_4vecf: 1506 ; CHECK: // %entry 1507 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1508 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] 1509 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] 1510 ; CHECK-NEXT: ret 1511 entry: 1512 store <4 x float> zeroinitializer, <4 x float>* %p 1513 ret void 1514 } 1515 1516 ; Similar to merge_zr32, but for 64-bit values. 1517 define void @merge_zr64(i64* %p) { 1518 ; CHECK-LABEL: merge_zr64: 1519 ; CHECK: // %entry 1520 ; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1521 ; CHECK-NEXT: ret 1522 entry: 1523 store i64 0, i64* %p 1524 %p1 = getelementptr i64, i64* %p, i64 1 1525 store i64 0, i64* %p1 1526 ret void 1527 } 1528 1529 ; Similar to merge_zr32, but for 64-bit values and with unaligned stores. 1530 define void @merge_zr64_unalign(<2 x i64>* %p) { 1531 ; CHECK-LABEL: merge_zr64_unalign: 1532 ; CHECK: // %entry 1533 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1534 ; STRICTALIGN: strb 1535 ; STRICTALIGN: strb 1536 ; STRICTALIGN: strb 1537 ; STRICTALIGN: strb 1538 ; STRICTALIGN: strb 1539 ; STRICTALIGN: strb 1540 ; STRICTALIGN: strb 1541 ; STRICTALIGN: strb 1542 ; STRICTALIGN: strb 1543 ; STRICTALIGN: strb 1544 ; STRICTALIGN: strb 1545 ; STRICTALIGN: strb 1546 ; STRICTALIGN: strb 1547 ; STRICTALIGN: strb 1548 ; STRICTALIGN: strb 1549 ; STRICTALIGN: strb 1550 ; CHECK-NEXT: ret 1551 entry: 1552 store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1 1553 ret void 1554 } 1555 1556 ; Similar to merge_zr32_3, replaceZeroVectorStore should not split the 1557 ; vector store since the zero constant vector has multiple uses. 1558 define void @merge_zr64_2(i64* %p) { 1559 ; CHECK-LABEL: merge_zr64_2: 1560 ; CHECK: // %entry 1561 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1562 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1563 ; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1564 ; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #16] 1565 ; CHECK-NEXT: ret 1566 entry: 1567 store i64 0, i64* %p 1568 %p1 = getelementptr i64, i64* %p, i64 1 1569 store i64 0, i64* %p1 1570 %p2 = getelementptr i64, i64* %p, i64 2 1571 store i64 0, i64* %p2 1572 %p3 = getelementptr i64, i64* %p, i64 3 1573 store i64 0, i64* %p3 1574 ret void 1575 } 1576 1577 ; Like merge_zr64, but with 2-vector double type. 1578 define void @merge_zr64_2vecd(<2 x double>* %p) { 1579 ; CHECK-LABEL: merge_zr64_2vecd: 1580 ; CHECK: // %entry 1581 ; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1582 ; CHECK-NEXT: ret 1583 entry: 1584 store <2 x double> zeroinitializer, <2 x double>* %p 1585 ret void 1586 } 1587 1588 ; Like merge_zr64, but with 3-vector i64 type. 1589 define void @merge_zr64_3vec(<3 x i64>* %p) { 1590 ; CHECK-LABEL: merge_zr64_3vec: 1591 ; CHECK: // %entry 1592 ; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] 1593 ; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16] 1594 ; CHECK-NEXT: ret 1595 entry: 1596 store <3 x i64> zeroinitializer, <3 x i64>* %p 1597 ret void 1598 } 1599 1600 ; Like merge_zr64_2, but with 4-vector double type. 1601 define void @merge_zr64_4vecd(<4 x double>* %p) { 1602 ; CHECK-LABEL: merge_zr64_4vecd: 1603 ; CHECK: // %entry 1604 ; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1605 ; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1606 ; CHECK-NEXT: ret 1607 entry: 1608 store <4 x double> zeroinitializer, <4 x double>* %p 1609 ret void 1610 } 1611 1612 ; Verify that non-consecutive merges do not generate q0 1613 define void @merge_multiple_128bit_stores(i64* %p) { 1614 ; CHECK-LABEL: merge_multiple_128bit_stores 1615 ; CHECK: // %entry 1616 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1617 ; NOSTRICTALIGN-NEXT: str q0, [x0] 1618 ; NOSTRICTALIGN-NEXT: stur q0, [x0, #24] 1619 ; NOSTRICTALIGN-NEXT: str q0, [x0, #48] 1620 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0] 1621 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #24] 1622 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #48] 1623 ; CHECK-NEXT: ret 1624 entry: 1625 store i64 0, i64* %p 1626 %p1 = getelementptr i64, i64* %p, i64 1 1627 store i64 0, i64* %p1 1628 %p3 = getelementptr i64, i64* %p, i64 3 1629 store i64 0, i64* %p3 1630 %p4 = getelementptr i64, i64* %p, i64 4 1631 store i64 0, i64* %p4 1632 %p6 = getelementptr i64, i64* %p, i64 6 1633 store i64 0, i64* %p6 1634 %p7 = getelementptr i64, i64* %p, i64 7 1635 store i64 0, i64* %p7 1636 ret void 1637 } 1638 1639 ; Verify that large stores generate stp q 1640 define void @merge_multiple_128bit_stores_consec(i64* %p) { 1641 ; CHECK-LABEL: merge_multiple_128bit_stores_consec 1642 ; CHECK: // %entry 1643 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 1644 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] 1645 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}, #32] 1646 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0] 1647 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #16] 1648 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #32] 1649 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #48] 1650 ; CHECK-NEXT: ret 1651 entry: 1652 store i64 0, i64* %p 1653 %p1 = getelementptr i64, i64* %p, i64 1 1654 store i64 0, i64* %p1 1655 %p2 = getelementptr i64, i64* %p, i64 2 1656 store i64 0, i64* %p2 1657 %p3 = getelementptr i64, i64* %p, i64 3 1658 store i64 0, i64* %p3 1659 %p4 = getelementptr i64, i64* %p, i64 4 1660 store i64 0, i64* %p4 1661 %p5 = getelementptr i64, i64* %p, i64 5 1662 store i64 0, i64* %p5 1663 %p6 = getelementptr i64, i64* %p, i64 6 1664 store i64 0, i64* %p6 1665 %p7 = getelementptr i64, i64* %p, i64 7 1666 store i64 0, i64* %p7 1667 ret void 1668 } 1669 1670 ; Check for bug 34674 where invalid add of xzr was being generated. 1671 ; CHECK-LABEL: bug34674: 1672 ; CHECK: // %entry 1673 ; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr 1674 ; CHECK-DAG: stp xzr, xzr, [x0] 1675 ; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1 1676 define i64 @bug34674(<2 x i64>* %p) { 1677 entry: 1678 store <2 x i64> zeroinitializer, <2 x i64>* %p 1679 %p2 = bitcast <2 x i64>* %p to i64* 1680 %ld = load i64, i64* %p2 1681 %add = add i64 %ld, 1 1682 ret i64 %add 1683 } 1684