Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTRICTALIGN %s
      2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+strict-align -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=STRICTALIGN %s
      3 
      4 ; This file contains tests for the AArch64 load/store optimizer.
      5 
      6 %padding = type { i8*, i8*, i8*, i8* }
      7 %s.byte = type { i8, i8 }
      8 %s.halfword = type { i16, i16 }
      9 %s.word = type { i32, i32 }
     10 %s.doubleword = type { i64, i32 }
     11 %s.quadword = type { fp128, i32 }
     12 %s.float = type { float, i32 }
     13 %s.double = type { double, i32 }
     14 %struct.byte = type { %padding, %s.byte }
     15 %struct.halfword = type { %padding, %s.halfword }
     16 %struct.word = type { %padding, %s.word }
     17 %struct.doubleword = type { %padding, %s.doubleword }
     18 %struct.quadword = type { %padding, %s.quadword }
     19 %struct.float = type { %padding, %s.float }
     20 %struct.double = type { %padding, %s.double }
     21 
     22 ; Check the following transform:
     23 ;
     24 ; (ldr|str) X, [x0, #32]
     25 ;  ...
     26 ; add x0, x0, #32
     27 ;  ->
     28 ; (ldr|str) X, [x0, #32]!
     29 ;
     30 ; with X being either w1, x1, s0, d0 or q0.
     31 
     32 declare void @bar_byte(%s.byte*, i8)
     33 
     34 define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind {
     35 ; CHECK-LABEL: load-pre-indexed-byte
     36 ; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
     37 entry:
     38   %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
     39   %add = load i8, i8* %a, align 4
     40   br label %bar
     41 bar:
     42   %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
     43   tail call void @bar_byte(%s.byte* %c, i8 %add)
     44   ret void
     45 }
     46 
     47 define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind {
     48 ; CHECK-LABEL: store-pre-indexed-byte
     49 ; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
     50 entry:
     51   %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
     52   store i8 %val, i8* %a, align 4
     53   br label %bar
     54 bar:
     55   %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
     56   tail call void @bar_byte(%s.byte* %c, i8 %val)
     57   ret void
     58 }
     59 
     60 declare void @bar_halfword(%s.halfword*, i16)
     61 
     62 define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind {
     63 ; CHECK-LABEL: load-pre-indexed-halfword
     64 ; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
     65 entry:
     66   %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
     67   %add = load i16, i16* %a, align 4
     68   br label %bar
     69 bar:
     70   %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
     71   tail call void @bar_halfword(%s.halfword* %c, i16 %add)
     72   ret void
     73 }
     74 
     75 define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind {
     76 ; CHECK-LABEL: store-pre-indexed-halfword
     77 ; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
     78 entry:
     79   %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
     80   store i16 %val, i16* %a, align 4
     81   br label %bar
     82 bar:
     83   %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
     84   tail call void @bar_halfword(%s.halfword* %c, i16 %val)
     85   ret void
     86 }
     87 
     88 declare void @bar_word(%s.word*, i32)
     89 
     90 define void @load-pre-indexed-word(%struct.word* %ptr) nounwind {
     91 ; CHECK-LABEL: load-pre-indexed-word
     92 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
     93 entry:
     94   %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
     95   %add = load i32, i32* %a, align 4
     96   br label %bar
     97 bar:
     98   %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
     99   tail call void @bar_word(%s.word* %c, i32 %add)
    100   ret void
    101 }
    102 
    103 define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
    104 ; CHECK-LABEL: store-pre-indexed-word
    105 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    106 entry:
    107   %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
    108   store i32 %val, i32* %a, align 4
    109   br label %bar
    110 bar:
    111   %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
    112   tail call void @bar_word(%s.word* %c, i32 %val)
    113   ret void
    114 }
    115 
    116 declare void @bar_doubleword(%s.doubleword*, i64)
    117 
    118 define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind {
    119 ; CHECK-LABEL: load-pre-indexed-doubleword
    120 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    121 entry:
    122   %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
    123   %add = load i64, i64* %a, align 8
    124   br label %bar
    125 bar:
    126   %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
    127   tail call void @bar_doubleword(%s.doubleword* %c, i64 %add)
    128   ret void
    129 }
    130 
    131 define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) nounwind {
    132 ; CHECK-LABEL: store-pre-indexed-doubleword
    133 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    134 entry:
    135   %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
    136   store i64 %val, i64* %a, align 8
    137   br label %bar
    138 bar:
    139   %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
    140   tail call void @bar_doubleword(%s.doubleword* %c, i64 %val)
    141   ret void
    142 }
    143 
    144 declare void @bar_quadword(%s.quadword*, fp128)
    145 
    146 define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind {
    147 ; CHECK-LABEL: load-pre-indexed-quadword
    148 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    149 entry:
    150   %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
    151   %add = load fp128, fp128* %a, align 16
    152   br label %bar
    153 bar:
    154   %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
    155   tail call void @bar_quadword(%s.quadword* %c, fp128 %add)
    156   ret void
    157 }
    158 
    159 define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) nounwind {
    160 ; CHECK-LABEL: store-pre-indexed-quadword
    161 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    162 entry:
    163   %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
    164   store fp128 %val, fp128* %a, align 16
    165   br label %bar
    166 bar:
    167   %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
    168   tail call void @bar_quadword(%s.quadword* %c, fp128 %val)
    169   ret void
    170 }
    171 
    172 declare void @bar_float(%s.float*, float)
    173 
    174 define void @load-pre-indexed-float(%struct.float* %ptr) nounwind {
    175 ; CHECK-LABEL: load-pre-indexed-float
    176 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    177 entry:
    178   %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
    179   %add = load float, float* %a, align 4
    180   br label %bar
    181 bar:
    182   %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
    183   tail call void @bar_float(%s.float* %c, float %add)
    184   ret void
    185 }
    186 
    187 define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind {
    188 ; CHECK-LABEL: store-pre-indexed-float
    189 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    190 entry:
    191   %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
    192   store float %val, float* %a, align 4
    193   br label %bar
    194 bar:
    195   %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
    196   tail call void @bar_float(%s.float* %c, float %val)
    197   ret void
    198 }
    199 
    200 declare void @bar_double(%s.double*, double)
    201 
    202 define void @load-pre-indexed-double(%struct.double* %ptr) nounwind {
    203 ; CHECK-LABEL: load-pre-indexed-double
    204 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    205 entry:
    206   %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
    207   %add = load double, double* %a, align 8
    208   br label %bar
    209 bar:
    210   %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
    211   tail call void @bar_double(%s.double* %c, double %add)
    212   ret void
    213 }
    214 
    215 define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwind {
    216 ; CHECK-LABEL: store-pre-indexed-double
    217 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    218 entry:
    219   %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
    220   store double %val, double* %a, align 8
    221   br label %bar
    222 bar:
    223   %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
    224   tail call void @bar_double(%s.double* %c, double %val)
    225   ret void
    226 }
    227 
    228 ; Check the following transform:
    229 ;
    230 ; (ldp|stp) w1, w2 [x0, #32]
    231 ;  ...
    232 ; add x0, x0, #32
    233 ;  ->
    234 ; (ldp|stp) w1, w2, [x0, #32]!
    235 ;
    236 
    237 define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind {
    238 ; CHECK-LABEL: load-pair-pre-indexed-word
    239 ; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
    240 ; CHECK-NOT: add x0, x0, #32
    241 entry:
    242   %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
    243   %a1 = load i32, i32* %a, align 4
    244   %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
    245   %b1 = load i32, i32* %b, align 4
    246   %add = add i32 %a1, %b1
    247   br label %bar
    248 bar:
    249   %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
    250   tail call void @bar_word(%s.word* %c, i32 %add)
    251   ret void
    252 }
    253 
    254 define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
    255 ; CHECK-LABEL: store-pair-pre-indexed-word
    256 ; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
    257 ; CHECK-NOT: add x0, x0, #32
    258 entry:
    259   %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
    260   store i32 %val, i32* %a, align 4
    261   %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
    262   store i32 %val, i32* %b, align 4
    263   br label %bar
    264 bar:
    265   %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
    266   tail call void @bar_word(%s.word* %c, i32 %val)
    267   ret void
    268 }
    269 
    270 ; Check the following transform:
    271 ;
    272 ; add x8, x8, #16
    273 ;  ...
    274 ; ldr X, [x8]
    275 ;  ->
    276 ; ldr X, [x8, #16]!
    277 ;
    278 ; with X being either w0, x0, s0, d0 or q0.
    279 
    280 %pre.struct.i32 = type { i32, i32, i32, i32, i32}
    281 %pre.struct.i64 = type { i32, i64, i64, i64, i64}
    282 %pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>}
    283 %pre.struct.float = type { i32, float, float, float}
    284 %pre.struct.double = type { i32, double, double, double}
    285 
    286 define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
    287                                    %pre.struct.i32* %load2) nounwind {
    288 ; CHECK-LABEL: load-pre-indexed-word2
    289 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
    290   br i1 %cond, label %if.then, label %if.end
    291 if.then:
    292   %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
    293   %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
    294   br label %return
    295 if.end:
    296   %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2
    297   br label %return
    298 return:
    299   %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
    300   %ret = load i32, i32* %retptr
    301   ret i32 %ret
    302 }
    303 
    304 define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
    305                                          %pre.struct.i64* %load2) nounwind {
    306 ; CHECK-LABEL: load-pre-indexed-doubleword2
    307 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
    308   br i1 %cond, label %if.then, label %if.end
    309 if.then:
    310   %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
    311   %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
    312   br label %return
    313 if.end:
    314   %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2
    315   br label %return
    316 return:
    317   %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
    318   %ret = load i64, i64* %retptr
    319   ret i64 %ret
    320 }
    321 
    322 define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
    323                                              %pre.struct.i128* %load2) nounwind {
    324 ; CHECK-LABEL: load-pre-indexed-quadword2
    325 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
    326   br i1 %cond, label %if.then, label %if.end
    327 if.then:
    328   %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
    329   %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
    330   br label %return
    331 if.end:
    332   %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2
    333   br label %return
    334 return:
    335   %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
    336   %ret = load <2 x i64>, <2 x i64>* %retptr
    337   ret <2 x i64> %ret
    338 }
    339 
    340 define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
    341                                       %pre.struct.float* %load2) nounwind {
    342 ; CHECK-LABEL: load-pre-indexed-float2
    343 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
    344   br i1 %cond, label %if.then, label %if.end
    345 if.then:
    346   %load1 = load %pre.struct.float*, %pre.struct.float** %this
    347   %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
    348   br label %return
    349 if.end:
    350   %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2
    351   br label %return
    352 return:
    353   %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
    354   %ret = load float, float* %retptr
    355   ret float %ret
    356 }
    357 
    358 define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
    359                                         %pre.struct.double* %load2) nounwind {
    360 ; CHECK-LABEL: load-pre-indexed-double2
    361 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
    362   br i1 %cond, label %if.then, label %if.end
    363 if.then:
    364   %load1 = load %pre.struct.double*, %pre.struct.double** %this
    365   %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
    366   br label %return
    367 if.end:
    368   %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2
    369   br label %return
    370 return:
    371   %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
    372   %ret = load double, double* %retptr
    373   ret double %ret
    374 }
    375 
    376 define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
    377                                    %pre.struct.i32* %load2) nounwind {
    378 ; CHECK-LABEL: load-pre-indexed-word3
    379 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
    380   br i1 %cond, label %if.then, label %if.end
    381 if.then:
    382   %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
    383   %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
    384   br label %return
    385 if.end:
    386   %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
    387   br label %return
    388 return:
    389   %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
    390   %ret = load i32, i32* %retptr
    391   ret i32 %ret
    392 }
    393 
    394 define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
    395                                          %pre.struct.i64* %load2) nounwind {
    396 ; CHECK-LABEL: load-pre-indexed-doubleword3
    397 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]!
    398   br i1 %cond, label %if.then, label %if.end
    399 if.then:
    400   %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
    401   %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2
    402   br label %return
    403 if.end:
    404   %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3
    405   br label %return
    406 return:
    407   %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
    408   %ret = load i64, i64* %retptr
    409   ret i64 %ret
    410 }
    411 
    412 define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
    413                                              %pre.struct.i128* %load2) nounwind {
    414 ; CHECK-LABEL: load-pre-indexed-quadword3
    415 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    416   br i1 %cond, label %if.then, label %if.end
    417 if.then:
    418   %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
    419   %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
    420   br label %return
    421 if.end:
    422   %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
    423   br label %return
    424 return:
    425   %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
    426   %ret = load <2 x i64>, <2 x i64>* %retptr
    427   ret <2 x i64> %ret
    428 }
    429 
    430 define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
    431                                       %pre.struct.float* %load2) nounwind {
    432 ; CHECK-LABEL: load-pre-indexed-float3
    433 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
    434   br i1 %cond, label %if.then, label %if.end
    435 if.then:
    436   %load1 = load %pre.struct.float*, %pre.struct.float** %this
    437   %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
    438   br label %return
    439 if.end:
    440   %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
    441   br label %return
    442 return:
    443   %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
    444   %ret = load float, float* %retptr
    445   ret float %ret
    446 }
    447 
    448 define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
    449                                         %pre.struct.double* %load2) nounwind {
    450 ; CHECK-LABEL: load-pre-indexed-double3
    451 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
    452   br i1 %cond, label %if.then, label %if.end
    453 if.then:
    454   %load1 = load %pre.struct.double*, %pre.struct.double** %this
    455   %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
    456   br label %return
    457 if.end:
    458   %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
    459   br label %return
    460 return:
    461   %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
    462   %ret = load double, double* %retptr
    463   ret double %ret
    464 }
    465 
    466 ; Check the following transform:
    467 ;
    468 ; add x8, x8, #16
    469 ;  ...
    470 ; str X, [x8]
    471 ;  ->
    472 ; str X, [x8, #16]!
    473 ;
    474 ; with X being either w0, x0, s0, d0 or q0.
    475 
    476 define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
    477                                      %pre.struct.i32* %load2,
    478                                      i32 %val) nounwind {
    479 ; CHECK-LABEL: store-pre-indexed-word2
    480 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
    481   br i1 %cond, label %if.then, label %if.end
    482 if.then:
    483   %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
    484   %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
    485   br label %return
    486 if.end:
    487   %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2
    488   br label %return
    489 return:
    490   %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
    491   store i32 %val, i32* %retptr
    492   ret void
    493 }
    494 
    495 define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
    496                                            %pre.struct.i64* %load2,
    497                                            i64 %val) nounwind {
    498 ; CHECK-LABEL: store-pre-indexed-doubleword2
    499 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
    500   br i1 %cond, label %if.then, label %if.end
    501 if.then:
    502   %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
    503   %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
    504   br label %return
    505 if.end:
    506   %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2
    507   br label %return
    508 return:
    509   %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
    510   store i64 %val, i64* %retptr
    511   ret void
    512 }
    513 
    514 define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
    515                                          %pre.struct.i128* %load2,
    516                                          <2 x i64> %val) nounwind {
    517 ; CHECK-LABEL: store-pre-indexed-quadword2
    518 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
    519   br i1 %cond, label %if.then, label %if.end
    520 if.then:
    521   %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
    522   %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
    523   br label %return
    524 if.end:
    525   %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2
    526   br label %return
    527 return:
    528   %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
    529   store <2 x i64> %val, <2 x i64>* %retptr
    530   ret void
    531 }
    532 
    533 define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
    534                                       %pre.struct.float* %load2,
    535                                       float %val) nounwind {
    536 ; CHECK-LABEL: store-pre-indexed-float2
    537 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
    538   br i1 %cond, label %if.then, label %if.end
    539 if.then:
    540   %load1 = load %pre.struct.float*, %pre.struct.float** %this
    541   %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
    542   br label %return
    543 if.end:
    544   %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2
    545   br label %return
    546 return:
    547   %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
    548   store float %val, float* %retptr
    549   ret void
    550 }
    551 
    552 define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
    553                                       %pre.struct.double* %load2,
    554                                       double %val) nounwind {
    555 ; CHECK-LABEL: store-pre-indexed-double2
    556 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
    557   br i1 %cond, label %if.then, label %if.end
    558 if.then:
    559   %load1 = load %pre.struct.double*, %pre.struct.double** %this
    560   %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
    561   br label %return
    562 if.end:
    563   %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2
    564   br label %return
    565 return:
    566   %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
    567   store double %val, double* %retptr
    568   ret void
    569 }
    570 
    571 define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
    572                                      %pre.struct.i32* %load2,
    573                                      i32 %val) nounwind {
    574 ; CHECK-LABEL: store-pre-indexed-word3
    575 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
    576   br i1 %cond, label %if.then, label %if.end
    577 if.then:
    578   %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
    579   %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
    580   br label %return
    581 if.end:
    582   %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
    583   br label %return
    584 return:
    585   %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
    586   store i32 %val, i32* %retptr
    587   ret void
    588 }
    589 
    590 define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
    591                                            %pre.struct.i64* %load2,
    592                                            i64 %val) nounwind {
    593 ; CHECK-LABEL: store-pre-indexed-doubleword3
    594 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]!
    595   br i1 %cond, label %if.then, label %if.end
    596 if.then:
    597   %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
    598   %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3
    599   br label %return
    600 if.end:
    601   %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4
    602   br label %return
    603 return:
    604   %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
    605   store i64 %val, i64* %retptr
    606   ret void
    607 }
    608 
    609 define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
    610                                          %pre.struct.i128* %load2,
    611                                          <2 x i64> %val) nounwind {
    612 ; CHECK-LABEL: store-pre-indexed-quadword3
    613 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
    614   br i1 %cond, label %if.then, label %if.end
    615 if.then:
    616   %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
    617   %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
    618   br label %return
    619 if.end:
    620   %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
    621   br label %return
    622 return:
    623   %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
    624   store <2 x i64> %val, <2 x i64>* %retptr
    625   ret void
    626 }
    627 
    628 define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
    629                                       %pre.struct.float* %load2,
    630                                       float %val) nounwind {
    631 ; CHECK-LABEL: store-pre-indexed-float3
    632 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
    633   br i1 %cond, label %if.then, label %if.end
    634 if.then:
    635   %load1 = load %pre.struct.float*, %pre.struct.float** %this
    636   %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
    637   br label %return
    638 if.end:
    639   %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
    640   br label %return
    641 return:
    642   %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
    643   store float %val, float* %retptr
    644   ret void
    645 }
    646 
    647 define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
    648                                       %pre.struct.double* %load2,
    649                                       double %val) nounwind {
    650 ; CHECK-LABEL: store-pre-indexed-double3
    651 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
    652   br i1 %cond, label %if.then, label %if.end
    653 if.then:
    654   %load1 = load %pre.struct.double*, %pre.struct.double** %this
    655   %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
    656   br label %return
    657 if.end:
    658   %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
    659   br label %return
    660 return:
    661   %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
    662   store double %val, double* %retptr
    663   ret void
    664 }
    665 
    666 ; Check the following transform:
    667 ;
    668 ; ldr X, [x20]
    669 ;  ...
    670 ; add x20, x20, #32
    671 ;  ->
    672 ; ldr X, [x20], #32
    673 ;
    674 ; with X being either w0, x0, s0, d0 or q0.
    675 
    676 define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind {
    677 ; CHECK-LABEL: load-post-indexed-byte
    678 ; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4
    679 entry:
    680   %gep1 = getelementptr i8, i8* %array, i64 2
    681   br label %body
    682 
    683 body:
    684   %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
    685   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    686   %gep2 = getelementptr i8, i8* %iv2, i64 -1
    687   %load = load i8, i8* %gep2
    688   call void @use-byte(i8 %load)
    689   %load2 = load i8, i8* %iv2
    690   call void @use-byte(i8 %load2)
    691   %iv.next = add i64 %iv, -4
    692   %gep3 = getelementptr i8, i8* %iv2, i64 4
    693   %cond = icmp eq i64 %iv.next, 0
    694   br i1 %cond, label %exit, label %body
    695 
    696 exit:
    697   ret void
    698 }
    699 
    700 define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind {
    701 ; CHECK-LABEL: load-post-indexed-halfword
    702 ; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8
    703 entry:
    704   %gep1 = getelementptr i16, i16* %array, i64 2
    705   br label %body
    706 
    707 body:
    708   %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
    709   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    710   %gep2 = getelementptr i16, i16* %iv2, i64 -1
    711   %load = load i16, i16* %gep2
    712   call void @use-halfword(i16 %load)
    713   %load2 = load i16, i16* %iv2
    714   call void @use-halfword(i16 %load2)
    715   %iv.next = add i64 %iv, -4
    716   %gep3 = getelementptr i16, i16* %iv2, i64 4
    717   %cond = icmp eq i64 %iv.next, 0
    718   br i1 %cond, label %exit, label %body
    719 
    720 exit:
    721   ret void
    722 }
    723 
    724 define void @load-post-indexed-word(i32* %array, i64 %count) nounwind {
    725 ; CHECK-LABEL: load-post-indexed-word
    726 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16
    727 entry:
    728   %gep1 = getelementptr i32, i32* %array, i64 2
    729   br label %body
    730 
    731 body:
    732   %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
    733   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    734   %gep2 = getelementptr i32, i32* %iv2, i64 -1
    735   %load = load i32, i32* %gep2
    736   call void @use-word(i32 %load)
    737   %load2 = load i32, i32* %iv2
    738   call void @use-word(i32 %load2)
    739   %iv.next = add i64 %iv, -4
    740   %gep3 = getelementptr i32, i32* %iv2, i64 4
    741   %cond = icmp eq i64 %iv.next, 0
    742   br i1 %cond, label %exit, label %body
    743 
    744 exit:
    745   ret void
    746 }
    747 
    748 define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind {
    749 ; CHECK-LABEL: load-post-indexed-doubleword
    750 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32
    751 entry:
    752   %gep1 = getelementptr i64, i64* %array, i64 2
    753   br label %body
    754 
    755 body:
    756   %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
    757   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    758   %gep2 = getelementptr i64, i64* %iv2, i64 -1
    759   %load = load i64, i64* %gep2
    760   call void @use-doubleword(i64 %load)
    761   %load2 = load i64, i64* %iv2
    762   call void @use-doubleword(i64 %load2)
    763   %iv.next = add i64 %iv, -4
    764   %gep3 = getelementptr i64, i64* %iv2, i64 4
    765   %cond = icmp eq i64 %iv.next, 0
    766   br i1 %cond, label %exit, label %body
    767 
    768 exit:
    769   ret void
    770 }
    771 
    772 define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind {
    773 ; CHECK-LABEL: load-post-indexed-quadword
    774 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64
    775 entry:
    776   %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2
    777   br label %body
    778 
    779 body:
    780   %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
    781   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    782   %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
    783   %load = load <2 x i64>, <2 x i64>* %gep2
    784   call void @use-quadword(<2 x i64> %load)
    785   %load2 = load <2 x i64>, <2 x i64>* %iv2
    786   call void @use-quadword(<2 x i64> %load2)
    787   %iv.next = add i64 %iv, -4
    788   %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
    789   %cond = icmp eq i64 %iv.next, 0
    790   br i1 %cond, label %exit, label %body
    791 
    792 exit:
    793   ret void
    794 }
    795 
    796 define void @load-post-indexed-float(float* %array, i64 %count) nounwind {
    797 ; CHECK-LABEL: load-post-indexed-float
    798 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16
    799 entry:
    800   %gep1 = getelementptr float, float* %array, i64 2
    801   br label %body
    802 
    803 body:
    804   %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
    805   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    806   %gep2 = getelementptr float, float* %iv2, i64 -1
    807   %load = load float, float* %gep2
    808   call void @use-float(float %load)
    809   %load2 = load float, float* %iv2
    810   call void @use-float(float %load2)
    811   %iv.next = add i64 %iv, -4
    812   %gep3 = getelementptr float, float* %iv2, i64 4
    813   %cond = icmp eq i64 %iv.next, 0
    814   br i1 %cond, label %exit, label %body
    815 
    816 exit:
    817   ret void
    818 }
    819 
    820 define void @load-post-indexed-double(double* %array, i64 %count) nounwind {
    821 ; CHECK-LABEL: load-post-indexed-double
    822 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32
    823 entry:
    824   %gep1 = getelementptr double, double* %array, i64 2
    825   br label %body
    826 
    827 body:
    828   %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
    829   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    830   %gep2 = getelementptr double, double* %iv2, i64 -1
    831   %load = load double, double* %gep2
    832   call void @use-double(double %load)
    833   %load2 = load double, double* %iv2
    834   call void @use-double(double %load2)
    835   %iv.next = add i64 %iv, -4
    836   %gep3 = getelementptr double, double* %iv2, i64 4
    837   %cond = icmp eq i64 %iv.next, 0
    838   br i1 %cond, label %exit, label %body
    839 
    840 exit:
    841   ret void
    842 }
    843 
    844 ; Check the following transform:
    845 ;
    846 ; str X, [x20]
    847 ;  ...
    848 ; add x20, x20, #32
    849 ;  ->
    850 ; str X, [x20], #32
    851 ;
    852 ; with X being either w0, x0, s0, d0 or q0.
    853 
    854 define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind {
    855 ; CHECK-LABEL: store-post-indexed-byte
    856 ; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4
    857 entry:
    858   %gep1 = getelementptr i8, i8* %array, i64 2
    859   br label %body
    860 
    861 body:
    862   %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
    863   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    864   %gep2 = getelementptr i8, i8* %iv2, i64 -1
    865   %load = load i8, i8* %gep2
    866   call void @use-byte(i8 %load)
    867   store i8 %val, i8* %iv2
    868   %iv.next = add i64 %iv, -4
    869   %gep3 = getelementptr i8, i8* %iv2, i64 4
    870   %cond = icmp eq i64 %iv.next, 0
    871   br i1 %cond, label %exit, label %body
    872 
    873 exit:
    874   ret void
    875 }
    876 
    877 define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind {
    878 ; CHECK-LABEL: store-post-indexed-halfword
    879 ; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8
    880 entry:
    881   %gep1 = getelementptr i16, i16* %array, i64 2
    882   br label %body
    883 
    884 body:
    885   %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
    886   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    887   %gep2 = getelementptr i16, i16* %iv2, i64 -1
    888   %load = load i16, i16* %gep2
    889   call void @use-halfword(i16 %load)
    890   store i16 %val, i16* %iv2
    891   %iv.next = add i64 %iv, -4
    892   %gep3 = getelementptr i16, i16* %iv2, i64 4
    893   %cond = icmp eq i64 %iv.next, 0
    894   br i1 %cond, label %exit, label %body
    895 
    896 exit:
    897   ret void
    898 }
    899 
    900 define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind {
    901 ; CHECK-LABEL: store-post-indexed-word
    902 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
    903 entry:
    904   %gep1 = getelementptr i32, i32* %array, i64 2
    905   br label %body
    906 
    907 body:
    908   %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
    909   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    910   %gep2 = getelementptr i32, i32* %iv2, i64 -1
    911   %load = load i32, i32* %gep2
    912   call void @use-word(i32 %load)
    913   store i32 %val, i32* %iv2
    914   %iv.next = add i64 %iv, -4
    915   %gep3 = getelementptr i32, i32* %iv2, i64 4
    916   %cond = icmp eq i64 %iv.next, 0
    917   br i1 %cond, label %exit, label %body
    918 
    919 exit:
    920   ret void
    921 }
    922 
    923 define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind {
    924 ; CHECK-LABEL: store-post-indexed-doubleword
    925 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32
    926 entry:
    927   %gep1 = getelementptr i64, i64* %array, i64 2
    928   br label %body
    929 
    930 body:
    931   %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
    932   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    933   %gep2 = getelementptr i64, i64* %iv2, i64 -1
    934   %load = load i64, i64* %gep2
    935   call void @use-doubleword(i64 %load)
    936   store i64 %val, i64* %iv2
    937   %iv.next = add i64 %iv, -4
    938   %gep3 = getelementptr i64, i64* %iv2, i64 4
    939   %cond = icmp eq i64 %iv.next, 0
    940   br i1 %cond, label %exit, label %body
    941 
    942 exit:
    943   ret void
    944 }
    945 
    946 define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind {
    947 ; CHECK-LABEL: store-post-indexed-quadword
    948 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64
    949 entry:
    950   %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2
    951   br label %body
    952 
    953 body:
    954   %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
    955   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    956   %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
    957   %load = load <2 x i64>, <2 x i64>* %gep2
    958   call void @use-quadword(<2 x i64> %load)
    959   store <2 x i64> %val, <2 x i64>* %iv2
    960   %iv.next = add i64 %iv, -4
    961   %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
    962   %cond = icmp eq i64 %iv.next, 0
    963   br i1 %cond, label %exit, label %body
    964 
    965 exit:
    966   ret void
    967 }
    968 
    969 define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind {
    970 ; CHECK-LABEL: store-post-indexed-float
    971 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16
    972 entry:
    973   %gep1 = getelementptr float, float* %array, i64 2
    974   br label %body
    975 
    976 body:
    977   %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
    978   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
    979   %gep2 = getelementptr float, float* %iv2, i64 -1
    980   %load = load float, float* %gep2
    981   call void @use-float(float %load)
    982   store float %val, float* %iv2
    983   %iv.next = add i64 %iv, -4
    984   %gep3 = getelementptr float, float* %iv2, i64 4
    985   %cond = icmp eq i64 %iv.next, 0
    986   br i1 %cond, label %exit, label %body
    987 
    988 exit:
    989   ret void
    990 }
    991 
    992 define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind {
    993 ; CHECK-LABEL: store-post-indexed-double
    994 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32
    995 entry:
    996   %gep1 = getelementptr double, double* %array, i64 2
    997   br label %body
    998 
    999 body:
   1000   %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
   1001   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   1002   %gep2 = getelementptr double, double* %iv2, i64 -1
   1003   %load = load double, double* %gep2
   1004   call void @use-double(double %load)
   1005   store double %val, double* %iv2
   1006   %iv.next = add i64 %iv, -4
   1007   %gep3 = getelementptr double, double* %iv2, i64 4
   1008   %cond = icmp eq i64 %iv.next, 0
   1009   br i1 %cond, label %exit, label %body
   1010 
   1011 exit:
   1012   ret void
   1013 }
   1014 
   1015 declare void @use-byte(i8)
   1016 declare void @use-halfword(i16)
   1017 declare void @use-word(i32)
   1018 declare void @use-doubleword(i64)
   1019 declare void @use-quadword(<2 x i64>)
   1020 declare void @use-float(float)
   1021 declare void @use-double(double)
   1022 
   1023 ; Check the following transform:
   1024 ;
   1025 ; stp w0, [x20]
   1026 ;  ...
   1027 ; add x20, x20, #32
   1028 ;  ->
   1029 ; stp w0, [x20], #32
   1030 
   1031 define void @store-pair-post-indexed-word() nounwind {
   1032 ; CHECK-LABEL: store-pair-post-indexed-word
   1033 ; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16
   1034 ; CHECK: ret
   1035   %src = alloca { i32, i32 }, align 8
   1036   %dst = alloca { i32, i32 }, align 8
   1037 
   1038   %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0
   1039   %src.real = load i32, i32* %src.realp
   1040   %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1
   1041   %src.imag = load i32, i32* %src.imagp
   1042 
   1043   %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0
   1044   %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1
   1045   store i32 %src.real, i32* %dst.realp
   1046   store i32 %src.imag, i32* %dst.imagp
   1047   ret void
   1048 }
   1049 
   1050 define void @store-pair-post-indexed-doubleword() nounwind {
   1051 ; CHECK-LABEL: store-pair-post-indexed-doubleword
   1052 ; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32
   1053 ; CHECK: ret
   1054   %src = alloca { i64, i64 }, align 8
   1055   %dst = alloca { i64, i64 }, align 8
   1056 
   1057   %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0
   1058   %src.real = load i64, i64* %src.realp
   1059   %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1
   1060   %src.imag = load i64, i64* %src.imagp
   1061 
   1062   %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0
   1063   %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1
   1064   store i64 %src.real, i64* %dst.realp
   1065   store i64 %src.imag, i64* %dst.imagp
   1066   ret void
   1067 }
   1068 
   1069 define void @store-pair-post-indexed-float() nounwind {
   1070 ; CHECK-LABEL: store-pair-post-indexed-float
   1071 ; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16
   1072 ; CHECK: ret
   1073   %src = alloca { float, float }, align 8
   1074   %dst = alloca { float, float }, align 8
   1075 
   1076   %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0
   1077   %src.real = load float, float* %src.realp
   1078   %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1
   1079   %src.imag = load float, float* %src.imagp
   1080 
   1081   %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0
   1082   %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1
   1083   store float %src.real, float* %dst.realp
   1084   store float %src.imag, float* %dst.imagp
   1085   ret void
   1086 }
   1087 
   1088 define void @store-pair-post-indexed-double() nounwind {
   1089 ; CHECK-LABEL: store-pair-post-indexed-double
   1090 ; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32
   1091 ; CHECK: ret
   1092   %src = alloca { double, double }, align 8
   1093   %dst = alloca { double, double }, align 8
   1094 
   1095   %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
   1096   %src.real = load double, double* %src.realp
   1097   %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
   1098   %src.imag = load double, double* %src.imagp
   1099 
   1100   %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
   1101   %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
   1102   store double %src.real, double* %dst.realp
   1103   store double %src.imag, double* %dst.imagp
   1104   ret void
   1105 }
   1106 
   1107 ; Check the following transform:
   1108 ;
   1109 ; (ldr|str) X, [x20]
   1110 ;  ...
   1111 ; sub x20, x20, #16
   1112 ;  ->
   1113 ; (ldr|str) X, [x20], #-16
   1114 ;
   1115 ; with X being either w0, x0, s0, d0 or q0.
   1116 
   1117 define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
   1118 ; CHECK-LABEL: post-indexed-sub-word
   1119 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
   1120 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
   1121   br label %for.body
   1122 for.body:
   1123   %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
   1124   %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ]
   1125   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1126   %gep1 = getelementptr i32, i32* %phi1, i64 -1
   1127   %load1 = load i32, i32* %gep1
   1128   %gep2 = getelementptr i32, i32* %phi2, i64 -1
   1129   store i32 %load1, i32* %gep2
   1130   %load2 = load i32, i32* %phi1
   1131   store i32 %load2, i32* %phi2
   1132   %dec.i = add nsw i64 %i, -1
   1133   %gep3 = getelementptr i32, i32* %phi2, i64 -2
   1134   %gep4 = getelementptr i32, i32* %phi1, i64 -2
   1135   %cond = icmp sgt i64 %dec.i, 0
   1136   br i1 %cond, label %for.body, label %end
   1137 end:
   1138   ret void
   1139 }
   1140 
   1141 define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
   1142 ; CHECK-LABEL: post-indexed-sub-doubleword
   1143 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
   1144 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
   1145   br label %for.body
   1146 for.body:
   1147   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
   1148   %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
   1149   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1150   %gep1 = getelementptr i64, i64* %phi1, i64 -1
   1151   %load1 = load i64, i64* %gep1
   1152   %gep2 = getelementptr i64, i64* %phi2, i64 -1
   1153   store i64 %load1, i64* %gep2
   1154   %load2 = load i64, i64* %phi1
   1155   store i64 %load2, i64* %phi2
   1156   %dec.i = add nsw i64 %i, -1
   1157   %gep3 = getelementptr i64, i64* %phi2, i64 -2
   1158   %gep4 = getelementptr i64, i64* %phi1, i64 -2
   1159   %cond = icmp sgt i64 %dec.i, 0
   1160   br i1 %cond, label %for.body, label %end
   1161 end:
   1162   ret void
   1163 }
   1164 
   1165 define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
   1166 ; CHECK-LABEL: post-indexed-sub-quadword
   1167 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
   1168 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
   1169   br label %for.body
   1170 for.body:
   1171   %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
   1172   %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ]
   1173   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1174   %gep1 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -1
   1175   %load1 = load <2 x i64>, <2 x i64>* %gep1
   1176   %gep2 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -1
   1177   store <2 x i64> %load1, <2 x i64>* %gep2
   1178   %load2 = load <2 x i64>, <2 x i64>* %phi1
   1179   store <2 x i64> %load2, <2 x i64>* %phi2
   1180   %dec.i = add nsw i64 %i, -1
   1181   %gep3 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -2
   1182   %gep4 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -2
   1183   %cond = icmp sgt i64 %dec.i, 0
   1184   br i1 %cond, label %for.body, label %end
   1185 end:
   1186   ret void
   1187 }
   1188 
   1189 define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
   1190 ; CHECK-LABEL: post-indexed-sub-float
   1191 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
   1192 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
   1193   br label %for.body
   1194 for.body:
   1195   %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
   1196   %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ]
   1197   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1198   %gep1 = getelementptr float, float* %phi1, i64 -1
   1199   %load1 = load float, float* %gep1
   1200   %gep2 = getelementptr float, float* %phi2, i64 -1
   1201   store float %load1, float* %gep2
   1202   %load2 = load float, float* %phi1
   1203   store float %load2, float* %phi2
   1204   %dec.i = add nsw i64 %i, -1
   1205   %gep3 = getelementptr float, float* %phi2, i64 -2
   1206   %gep4 = getelementptr float, float* %phi1, i64 -2
   1207   %cond = icmp sgt i64 %dec.i, 0
   1208   br i1 %cond, label %for.body, label %end
   1209 end:
   1210   ret void
   1211 }
   1212 
   1213 define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
   1214 ; CHECK-LABEL: post-indexed-sub-double
   1215 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
   1216 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
   1217   br label %for.body
   1218 for.body:
   1219   %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
   1220   %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ]
   1221   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1222   %gep1 = getelementptr double, double* %phi1, i64 -1
   1223   %load1 = load double, double* %gep1
   1224   %gep2 = getelementptr double, double* %phi2, i64 -1
   1225   store double %load1, double* %gep2
   1226   %load2 = load double, double* %phi1
   1227   store double %load2, double* %phi2
   1228   %dec.i = add nsw i64 %i, -1
   1229   %gep3 = getelementptr double, double* %phi2, i64 -2
   1230   %gep4 = getelementptr double, double* %phi1, i64 -2
   1231   %cond = icmp sgt i64 %dec.i, 0
   1232   br i1 %cond, label %for.body, label %end
   1233 end:
   1234   ret void
   1235 }
   1236 
   1237 define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind {
   1238 ; CHECK-LABEL: post-indexed-sub-doubleword-offset-min
   1239 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256
   1240 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256
   1241   br label %for.body
   1242 for.body:
   1243   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
   1244   %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
   1245   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1246   %gep1 = getelementptr i64, i64* %phi1, i64 1
   1247   %load1 = load i64, i64* %gep1
   1248   %gep2 = getelementptr i64, i64* %phi2, i64 1
   1249   store i64 %load1, i64* %gep2
   1250   %load2 = load i64, i64* %phi1
   1251   store i64 %load2, i64* %phi2
   1252   %dec.i = add nsw i64 %i, -1
   1253   %gep3 = getelementptr i64, i64* %phi2, i64 -32
   1254   %gep4 = getelementptr i64, i64* %phi1, i64 -32
   1255   %cond = icmp sgt i64 %dec.i, 0
   1256   br i1 %cond, label %for.body, label %end
   1257 end:
   1258   ret void
   1259 }
   1260 
   1261 define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind {
   1262 ; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range
   1263 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}]
   1264 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
   1265 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}]
   1266 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
   1267 
   1268   br label %for.body
   1269 for.body:
   1270   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
   1271   %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
   1272   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1273   %gep1 = getelementptr i64, i64* %phi1, i64 1
   1274   %load1 = load i64, i64* %gep1
   1275   %gep2 = getelementptr i64, i64* %phi2, i64 1
   1276   store i64 %load1, i64* %gep2
   1277   %load2 = load i64, i64* %phi1
   1278   store i64 %load2, i64* %phi2
   1279   %dec.i = add nsw i64 %i, -1
   1280   %gep3 = getelementptr i64, i64* %phi2, i64 32
   1281   %gep4 = getelementptr i64, i64* %phi1, i64 32
   1282   %cond = icmp sgt i64 %dec.i, 0
   1283   br i1 %cond, label %for.body, label %end
   1284 end:
   1285   ret void
   1286 }
   1287 
   1288 define void @post-indexed-paired-min-offset(i64* %a, i64* %b, i64 %count) nounwind {
   1289 ; CHECK-LABEL: post-indexed-paired-min-offset
   1290 ; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512
   1291 ; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512
   1292   br label %for.body
   1293 for.body:
   1294   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
   1295   %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
   1296   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1297   %gep1 = getelementptr i64, i64* %phi1, i64 1
   1298   %load1 = load i64, i64* %gep1
   1299   %gep2 = getelementptr i64, i64* %phi2, i64 1
   1300   %load2 = load i64, i64* %phi1
   1301   store i64 %load1, i64* %gep2
   1302   store i64 %load2, i64* %phi2
   1303   %dec.i = add nsw i64 %i, -1
   1304   %gep3 = getelementptr i64, i64* %phi2, i64 -64
   1305   %gep4 = getelementptr i64, i64* %phi1, i64 -64
   1306   %cond = icmp sgt i64 %dec.i, 0
   1307   br i1 %cond, label %for.body, label %end
   1308 end:
   1309   ret void
   1310 }
   1311 
   1312 define void @post-indexed-paired-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind {
   1313 ; CHECK-LABEL: post-indexed-paired-offset-out-of-range
   1314 ; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}]
   1315 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512
   1316 ; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}]
   1317 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512
   1318   br label %for.body
   1319 for.body:
   1320   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
   1321   %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
   1322   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   1323   %gep1 = getelementptr i64, i64* %phi1, i64 1
   1324   %load1 = load i64, i64* %phi1
   1325   %gep2 = getelementptr i64, i64* %phi2, i64 1
   1326   %load2 = load i64, i64* %gep1
   1327   store i64 %load1, i64* %gep2
   1328   store i64 %load2, i64* %phi2
   1329   %dec.i = add nsw i64 %i, -1
   1330   %gep3 = getelementptr i64, i64* %phi2, i64 64
   1331   %gep4 = getelementptr i64, i64* %phi1, i64 64
   1332   %cond = icmp sgt i64 %dec.i, 0
   1333   br i1 %cond, label %for.body, label %end
   1334 end:
   1335   ret void
   1336 }
   1337 
   1338 ; DAGCombiner::MergeConsecutiveStores merges this into a vector store,
   1339 ; replaceZeroVectorStore should split the vector store back into
   1340 ; scalar stores which should get merged by AArch64LoadStoreOptimizer.
   1341 define void @merge_zr32(i32* %p) {
   1342 ; CHECK-LABEL: merge_zr32:
   1343 ; CHECK: // %entry
   1344 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
   1345 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1346 ; CHECK-NEXT: ret
   1347 entry:
   1348   store i32 0, i32* %p
   1349   %p1 = getelementptr i32, i32* %p, i32 1
   1350   store i32 0, i32* %p1
   1351   ret void
   1352 }
   1353 
   1354 ; Same as merge_zr32 but the merged stores should also get paried.
   1355 define void @merge_zr32_2(i32* %p) {
   1356 ; CHECK-LABEL: merge_zr32_2:
   1357 ; CHECK: // %entry
   1358 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1359 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1360 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
   1361 ; CHECK-NEXT: ret
   1362 entry:
   1363   store i32 0, i32* %p
   1364   %p1 = getelementptr i32, i32* %p, i32 1
   1365   store i32 0, i32* %p1
   1366   %p2 = getelementptr i32, i32* %p, i64 2
   1367   store i32 0, i32* %p2
   1368   %p3 = getelementptr i32, i32* %p, i64 3
   1369   store i32 0, i32* %p3
   1370   ret void
   1371 }
   1372 
   1373 ; Like merge_zr32_2, but checking the largest allowed stp immediate offset.
   1374 define void @merge_zr32_2_offset(i32* %p) {
   1375 ; CHECK-LABEL: merge_zr32_2_offset:
   1376 ; CHECK: // %entry
   1377 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504]
   1378 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #504]
   1379 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #508]
   1380 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #512]
   1381 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #516]
   1382 ; CHECK-NEXT: ret
   1383 entry:
   1384   %p0 = getelementptr i32, i32* %p, i32 126
   1385   store i32 0, i32* %p0
   1386   %p1 = getelementptr i32, i32* %p, i32 127
   1387   store i32 0, i32* %p1
   1388   %p2 = getelementptr i32, i32* %p, i64 128
   1389   store i32 0, i32* %p2
   1390   %p3 = getelementptr i32, i32* %p, i64 129
   1391   store i32 0, i32* %p3
   1392   ret void
   1393 }
   1394 
   1395 ; Like merge_zr32, but replaceZeroVectorStore should not split this
   1396 ; vector store since the address offset is too large for the stp
   1397 ; instruction.
   1398 define void @no_merge_zr32_2_offset(i32* %p) {
   1399 ; CHECK-LABEL: no_merge_zr32_2_offset:
   1400 ; CHECK: // %entry
   1401 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
   1402 ; NOSTRICTALIGN-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096]
   1403 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4096]
   1404 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4100]
   1405 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4104]
   1406 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4108]
   1407 ; CHECK-NEXT: ret
   1408 entry:
   1409   %p0 = getelementptr i32, i32* %p, i32 1024
   1410   store i32 0, i32* %p0
   1411   %p1 = getelementptr i32, i32* %p, i32 1025
   1412   store i32 0, i32* %p1
   1413   %p2 = getelementptr i32, i32* %p, i64 1026
   1414   store i32 0, i32* %p2
   1415   %p3 = getelementptr i32, i32* %p, i64 1027
   1416   store i32 0, i32* %p3
   1417   ret void
   1418 }
   1419 
   1420 ; Like merge_zr32, but replaceZeroVectorStore should not split the
   1421 ; vector store since the zero constant vector has multiple uses, so we
   1422 ; err on the side that allows for stp q instruction generation.
   1423 define void @merge_zr32_3(i32* %p) {
   1424 ; CHECK-LABEL: merge_zr32_3:
   1425 ; CHECK: // %entry
   1426 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
   1427 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
   1428 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1429 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
   1430 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #16]
   1431 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #24]
   1432 ; CHECK-NEXT: ret
   1433 entry:
   1434   store i32 0, i32* %p
   1435   %p1 = getelementptr i32, i32* %p, i32 1
   1436   store i32 0, i32* %p1
   1437   %p2 = getelementptr i32, i32* %p, i64 2
   1438   store i32 0, i32* %p2
   1439   %p3 = getelementptr i32, i32* %p, i64 3
   1440   store i32 0, i32* %p3
   1441   %p4 = getelementptr i32, i32* %p, i64 4
   1442   store i32 0, i32* %p4
   1443   %p5 = getelementptr i32, i32* %p, i64 5
   1444   store i32 0, i32* %p5
   1445   %p6 = getelementptr i32, i32* %p, i64 6
   1446   store i32 0, i32* %p6
   1447   %p7 = getelementptr i32, i32* %p, i64 7
   1448   store i32 0, i32* %p7
   1449   ret void
   1450 }
   1451 
   1452 ; Like merge_zr32, but with 2-vector type.
   1453 define void @merge_zr32_2vec(<2 x i32>* %p) {
   1454 ; CHECK-LABEL: merge_zr32_2vec:
   1455 ; CHECK: // %entry
   1456 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
   1457 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1458 ; CHECK-NEXT: ret
   1459 entry:
   1460   store <2 x i32> zeroinitializer, <2 x i32>* %p
   1461   ret void
   1462 }
   1463 
   1464 ; Like merge_zr32, but with 3-vector type.
   1465 define void @merge_zr32_3vec(<3 x i32>* %p) {
   1466 ; CHECK-LABEL: merge_zr32_3vec:
   1467 ; CHECK: // %entry
   1468 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
   1469 ; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
   1470 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1471 ; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
   1472 ; CHECK-NEXT: ret
   1473 entry:
   1474   store <3 x i32> zeroinitializer, <3 x i32>* %p
   1475   ret void
   1476 }
   1477 
   1478 ; Like merge_zr32, but with 4-vector type.
   1479 define void @merge_zr32_4vec(<4 x i32>* %p) {
   1480 ; CHECK-LABEL: merge_zr32_4vec:
   1481 ; CHECK: // %entry
   1482 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1483 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1484 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
   1485 ; CHECK-NEXT: ret
   1486 entry:
   1487   store <4 x i32> zeroinitializer, <4 x i32>* %p
   1488   ret void
   1489 }
   1490 
   1491 ; Like merge_zr32, but with 2-vector float type.
   1492 define void @merge_zr32_2vecf(<2 x float>* %p) {
   1493 ; CHECK-LABEL: merge_zr32_2vecf:
   1494 ; CHECK: // %entry
   1495 ; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
   1496 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1497 ; CHECK-NEXT: ret
   1498 entry:
   1499   store <2 x float> zeroinitializer, <2 x float>* %p
   1500   ret void
   1501 }
   1502 
   1503 ; Like merge_zr32, but with 4-vector float type.
   1504 define void @merge_zr32_4vecf(<4 x float>* %p) {
   1505 ; CHECK-LABEL: merge_zr32_4vecf:
   1506 ; CHECK: // %entry
   1507 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1508 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
   1509 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
   1510 ; CHECK-NEXT: ret
   1511 entry:
   1512   store <4 x float> zeroinitializer, <4 x float>* %p
   1513   ret void
   1514 }
   1515 
   1516 ; Similar to merge_zr32, but for 64-bit values.
   1517 define void @merge_zr64(i64* %p) {
   1518 ; CHECK-LABEL: merge_zr64:
   1519 ; CHECK: // %entry
   1520 ; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1521 ; CHECK-NEXT: ret
   1522 entry:
   1523   store i64 0, i64* %p
   1524   %p1 = getelementptr i64, i64* %p, i64 1
   1525   store i64 0, i64* %p1
   1526   ret void
   1527 }
   1528 
   1529 ; Similar to merge_zr32, but for 64-bit values and with unaligned stores.
   1530 define void @merge_zr64_unalign(<2 x i64>* %p) {
   1531 ; CHECK-LABEL: merge_zr64_unalign:
   1532 ; CHECK: // %entry
   1533 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1534 ; STRICTALIGN: strb
   1535 ; STRICTALIGN: strb
   1536 ; STRICTALIGN: strb
   1537 ; STRICTALIGN: strb
   1538 ; STRICTALIGN: strb
   1539 ; STRICTALIGN: strb
   1540 ; STRICTALIGN: strb
   1541 ; STRICTALIGN: strb
   1542 ; STRICTALIGN: strb
   1543 ; STRICTALIGN: strb
   1544 ; STRICTALIGN: strb
   1545 ; STRICTALIGN: strb
   1546 ; STRICTALIGN: strb
   1547 ; STRICTALIGN: strb
   1548 ; STRICTALIGN: strb
   1549 ; STRICTALIGN: strb
   1550 ; CHECK-NEXT: ret
   1551 entry:
   1552   store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1
   1553   ret void
   1554 }
   1555 
   1556 ; Similar to merge_zr32_3, replaceZeroVectorStore should not split the
   1557 ; vector store since the zero constant vector has multiple uses.
   1558 define void @merge_zr64_2(i64* %p) {
   1559 ; CHECK-LABEL: merge_zr64_2:
   1560 ; CHECK: // %entry
   1561 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
   1562 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
   1563 ; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1564 ; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #16]
   1565 ; CHECK-NEXT: ret
   1566 entry:
   1567   store i64 0, i64* %p
   1568   %p1 = getelementptr i64, i64* %p, i64 1
   1569   store i64 0, i64* %p1
   1570   %p2 = getelementptr i64, i64* %p, i64 2
   1571   store i64 0, i64* %p2
   1572   %p3 = getelementptr i64, i64* %p, i64 3
   1573   store i64 0, i64* %p3
   1574   ret void
   1575 }
   1576 
   1577 ; Like merge_zr64, but with 2-vector double type.
   1578 define void @merge_zr64_2vecd(<2 x double>* %p) {
   1579 ; CHECK-LABEL: merge_zr64_2vecd:
   1580 ; CHECK: // %entry
   1581 ; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1582 ; CHECK-NEXT: ret
   1583 entry:
   1584   store <2 x double> zeroinitializer, <2 x double>* %p
   1585   ret void
   1586 }
   1587 
   1588 ; Like merge_zr64, but with 3-vector i64 type.
   1589 define void @merge_zr64_3vec(<3 x i64>* %p) {
   1590 ; CHECK-LABEL: merge_zr64_3vec:
   1591 ; CHECK: // %entry
   1592 ; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
   1593 ; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
   1594 ; CHECK-NEXT: ret
   1595 entry:
   1596   store <3 x i64> zeroinitializer, <3 x i64>* %p
   1597   ret void
   1598 }
   1599 
   1600 ; Like merge_zr64_2, but with 4-vector double type.
   1601 define void @merge_zr64_4vecd(<4 x double>* %p) {
   1602 ; CHECK-LABEL: merge_zr64_4vecd:
   1603 ; CHECK: // %entry
   1604 ; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
   1605 ; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
   1606 ; CHECK-NEXT: ret
   1607 entry:
   1608   store <4 x double> zeroinitializer, <4 x double>* %p
   1609   ret void
   1610 }
   1611 
   1612 ; Verify that non-consecutive merges do not generate q0
   1613 define void @merge_multiple_128bit_stores(i64* %p) {
   1614 ; CHECK-LABEL: merge_multiple_128bit_stores
   1615 ; CHECK: // %entry
   1616 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
   1617 ; NOSTRICTALIGN-NEXT: str q0, [x0]
   1618 ; NOSTRICTALIGN-NEXT: stur q0, [x0, #24]
   1619 ; NOSTRICTALIGN-NEXT: str q0, [x0, #48]
   1620 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0]
   1621 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #24]
   1622 ; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #48]
   1623 ; CHECK-NEXT: ret
   1624 entry:
   1625   store i64 0, i64* %p
   1626   %p1 = getelementptr i64, i64* %p, i64 1
   1627   store i64 0, i64* %p1
   1628   %p3 = getelementptr i64, i64* %p, i64 3
   1629   store i64 0, i64* %p3
   1630   %p4 = getelementptr i64, i64* %p, i64 4
   1631   store i64 0, i64* %p4
   1632   %p6 = getelementptr i64, i64* %p, i64 6
   1633   store i64 0, i64* %p6
   1634   %p7 = getelementptr i64, i64* %p, i64 7
   1635   store i64 0, i64* %p7
   1636   ret void
   1637 }
   1638 
   1639 ; Verify that large stores generate stp q
   1640 define void @merge_multiple_128bit_stores_consec(i64* %p) {
   1641 ; CHECK-LABEL: merge_multiple_128bit_stores_consec
   1642 ; CHECK: // %entry
   1643 ; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
   1644 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
   1645 ; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}, #32]
   1646 ; STRICTALIGN-NEXT: stp	 xzr, xzr, [x0]
   1647 ; STRICTALIGN-NEXT: stp	 xzr, xzr, [x0, #16]
   1648 ; STRICTALIGN-NEXT: stp	 xzr, xzr, [x0, #32]
   1649 ; STRICTALIGN-NEXT: stp  xzr, xzr, [x0, #48]
   1650 ; CHECK-NEXT: ret
   1651 entry:
   1652   store i64 0, i64* %p
   1653   %p1 = getelementptr i64, i64* %p, i64 1
   1654   store i64 0, i64* %p1
   1655   %p2 = getelementptr i64, i64* %p, i64 2
   1656   store i64 0, i64* %p2
   1657   %p3 = getelementptr i64, i64* %p, i64 3
   1658   store i64 0, i64* %p3
   1659   %p4 = getelementptr i64, i64* %p, i64 4
   1660   store i64 0, i64* %p4
   1661   %p5 = getelementptr i64, i64* %p, i64 5
   1662   store i64 0, i64* %p5
   1663   %p6 = getelementptr i64, i64* %p, i64 6
   1664   store i64 0, i64* %p6
   1665   %p7 = getelementptr i64, i64* %p, i64 7
   1666   store i64 0, i64* %p7
   1667   ret void
   1668 }
   1669 
   1670 ; Check for bug 34674 where invalid add of xzr was being generated.
   1671 ; CHECK-LABEL: bug34674:
   1672 ; CHECK: // %entry
   1673 ; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr
   1674 ; CHECK-DAG: stp xzr, xzr, [x0]
   1675 ; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1
   1676 define i64 @bug34674(<2 x i64>* %p) {
   1677 entry:
   1678   store <2 x i64> zeroinitializer, <2 x i64>* %p
   1679   %p2 = bitcast <2 x i64>* %p to i64*
   1680   %ld = load i64, i64* %p2
   1681   %add = add i64 %ld, 1
   1682   ret i64 %add
   1683 }
   1684