Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
      2 
      3 ; CHECK-LABEL: ldp_int
      4 ; CHECK: ldp
      5 define i32 @ldp_int(i32* %p) nounwind {
      6   %tmp = load i32, i32* %p, align 4
      7   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
      8   %tmp1 = load i32, i32* %add.ptr, align 4
      9   %add = add nsw i32 %tmp1, %tmp
     10   ret i32 %add
     11 }
     12 
     13 ; CHECK-LABEL: ldp_sext_int
     14 ; CHECK: ldpsw
     15 define i64 @ldp_sext_int(i32* %p) nounwind {
     16   %tmp = load i32, i32* %p, align 4
     17   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     18   %tmp1 = load i32, i32* %add.ptr, align 4
     19   %sexttmp = sext i32 %tmp to i64
     20   %sexttmp1 = sext i32 %tmp1 to i64
     21   %add = add nsw i64 %sexttmp1, %sexttmp
     22   ret i64 %add
     23 }
     24 
     25 ; CHECK-LABEL: ldp_half_sext_res0_int:
     26 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
     27 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
     28 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
     29   %tmp = load i32, i32* %p, align 4
     30   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     31   %tmp1 = load i32, i32* %add.ptr, align 4
     32   %sexttmp = sext i32 %tmp to i64
     33   %sexttmp1 = zext i32 %tmp1 to i64
     34   %add = add nsw i64 %sexttmp1, %sexttmp
     35   ret i64 %add
     36 }
     37 
     38 ; CHECK-LABEL: ldp_half_sext_res1_int:
     39 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
     40 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
     41 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
     42   %tmp = load i32, i32* %p, align 4
     43   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     44   %tmp1 = load i32, i32* %add.ptr, align 4
     45   %sexttmp = zext i32 %tmp to i64
     46   %sexttmp1 = sext i32 %tmp1 to i64
     47   %add = add nsw i64 %sexttmp1, %sexttmp
     48   ret i64 %add
     49 }
     50 
     51 
     52 ; CHECK-LABEL: ldp_long
     53 ; CHECK: ldp
     54 define i64 @ldp_long(i64* %p) nounwind {
     55   %tmp = load i64, i64* %p, align 8
     56   %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
     57   %tmp1 = load i64, i64* %add.ptr, align 8
     58   %add = add nsw i64 %tmp1, %tmp
     59   ret i64 %add
     60 }
     61 
     62 ; CHECK-LABEL: ldp_float
     63 ; CHECK: ldp
     64 define float @ldp_float(float* %p) nounwind {
     65   %tmp = load float, float* %p, align 4
     66   %add.ptr = getelementptr inbounds float, float* %p, i64 1
     67   %tmp1 = load float, float* %add.ptr, align 4
     68   %add = fadd float %tmp, %tmp1
     69   ret float %add
     70 }
     71 
     72 ; CHECK-LABEL: ldp_double
     73 ; CHECK: ldp
     74 define double @ldp_double(double* %p) nounwind {
     75   %tmp = load double, double* %p, align 8
     76   %add.ptr = getelementptr inbounds double, double* %p, i64 1
     77   %tmp1 = load double, double* %add.ptr, align 8
     78   %add = fadd double %tmp, %tmp1
     79   ret double %add
     80 }
     81 
     82 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
     83 define i32 @ldur_int(i32* %a) nounwind {
     84 ; CHECK-LABEL: ldur_int
     85 ; CHECK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
     86 ; CHECK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
     87 ; CHECK-NEXT: ret
     88   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
     89   %tmp1 = load i32, i32* %p1, align 2
     90   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
     91   %tmp2 = load i32, i32* %p2, align 2
     92   %tmp3 = add i32 %tmp1, %tmp2
     93   ret i32 %tmp3
     94 }
     95 
     96 define i64 @ldur_sext_int(i32* %a) nounwind {
     97 ; CHECK-LABEL: ldur_sext_int
     98 ; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
     99 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    100 ; CHECK-NEXT: ret
    101   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    102   %tmp1 = load i32, i32* %p1, align 2
    103   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    104   %tmp2 = load i32, i32* %p2, align 2
    105   %sexttmp1 = sext i32 %tmp1 to i64
    106   %sexttmp2 = sext i32 %tmp2 to i64
    107   %tmp3 = add i64 %sexttmp1, %sexttmp2
    108   ret i64 %tmp3
    109 }
    110 
    111 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
    112 ; CHECK-LABEL: ldur_half_sext_int_res0
    113 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
    114 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
    115 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    116 ; CHECK-NEXT: ret
    117   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    118   %tmp1 = load i32, i32* %p1, align 2
    119   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    120   %tmp2 = load i32, i32* %p2, align 2
    121   %sexttmp1 = zext i32 %tmp1 to i64
    122   %sexttmp2 = sext i32 %tmp2 to i64
    123   %tmp3 = add i64 %sexttmp1, %sexttmp2
    124   ret i64 %tmp3
    125 }
    126 
    127 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
    128 ; CHECK-LABEL: ldur_half_sext_int_res1
    129 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
    130 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
    131 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    132 ; CHECK-NEXT: ret
    133   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    134   %tmp1 = load i32, i32* %p1, align 2
    135   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    136   %tmp2 = load i32, i32* %p2, align 2
    137   %sexttmp1 = sext i32 %tmp1 to i64
    138   %sexttmp2 = zext i32 %tmp2 to i64
    139   %tmp3 = add i64 %sexttmp1, %sexttmp2
    140   ret i64 %tmp3
    141 }
    142 
    143 
    144 define i64 @ldur_long(i64* %a) nounwind ssp {
    145 ; CHECK-LABEL: ldur_long
    146 ; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
    147 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    148 ; CHECK-NEXT: ret
    149   %p1 = getelementptr inbounds i64, i64* %a, i64 -1
    150   %tmp1 = load i64, i64* %p1, align 2
    151   %p2 = getelementptr inbounds i64, i64* %a, i64 -2
    152   %tmp2 = load i64, i64* %p2, align 2
    153   %tmp3 = add i64 %tmp1, %tmp2
    154   ret i64 %tmp3
    155 }
    156 
    157 define float @ldur_float(float* %a) {
    158 ; CHECK-LABEL: ldur_float
    159 ; CHECK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
    160 ; CHECK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
    161 ; CHECK-NEXT: ret
    162   %p1 = getelementptr inbounds float, float* %a, i64 -1
    163   %tmp1 = load float, float* %p1, align 2
    164   %p2 = getelementptr inbounds float, float* %a, i64 -2
    165   %tmp2 = load float, float* %p2, align 2
    166   %tmp3 = fadd float %tmp1, %tmp2
    167   ret float %tmp3
    168 }
    169 
    170 define double @ldur_double(double* %a) {
    171 ; CHECK-LABEL: ldur_double
    172 ; CHECK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
    173 ; CHECK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
    174 ; CHECK-NEXT: ret
    175   %p1 = getelementptr inbounds double, double* %a, i64 -1
    176   %tmp1 = load double, double* %p1, align 2
    177   %p2 = getelementptr inbounds double, double* %a, i64 -2
    178   %tmp2 = load double, double* %p2, align 2
    179   %tmp3 = fadd double %tmp1, %tmp2
    180   ret double %tmp3
    181 }
    182 
    183 ; Now check some boundary conditions
    184 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
    185 ; CHECK-LABEL: pairUpBarelyIn
    186 ; CHECK-NOT: ldur
    187 ; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
    188 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    189 ; CHECK-NEXT: ret
    190   %p1 = getelementptr inbounds i64, i64* %a, i64 -31
    191   %tmp1 = load i64, i64* %p1, align 2
    192   %p2 = getelementptr inbounds i64, i64* %a, i64 -32
    193   %tmp2 = load i64, i64* %p2, align 2
    194   %tmp3 = add i64 %tmp1, %tmp2
    195   ret i64 %tmp3
    196 }
    197 
    198 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
    199 ; CHECK-LABEL: pairUpBarelyInSext
    200 ; CHECK-NOT: ldur
    201 ; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
    202 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    203 ; CHECK-NEXT: ret
    204   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    205   %tmp1 = load i32, i32* %p1, align 2
    206   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    207   %tmp2 = load i32, i32* %p2, align 2
    208   %sexttmp1 = sext i32 %tmp1 to i64
    209   %sexttmp2 = sext i32 %tmp2 to i64
    210   %tmp3 = add i64 %sexttmp1, %sexttmp2
    211   ret i64 %tmp3
    212 }
    213 
    214 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
    215 ; CHECK-LABEL: pairUpBarelyInHalfSextRes0
    216 ; CHECK-NOT: ldur
    217 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
    218 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
    219 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    220 ; CHECK-NEXT: ret
    221   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    222   %tmp1 = load i32, i32* %p1, align 2
    223   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    224   %tmp2 = load i32, i32* %p2, align 2
    225   %sexttmp1 = zext i32 %tmp1 to i64
    226   %sexttmp2 = sext i32 %tmp2 to i64
    227   %tmp3 = add i64 %sexttmp1, %sexttmp2
    228   ret i64 %tmp3
    229 }
    230 
    231 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
    232 ; CHECK-LABEL: pairUpBarelyInHalfSextRes1
    233 ; CHECK-NOT: ldur
    234 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
    235 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
    236 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    237 ; CHECK-NEXT: ret
    238   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    239   %tmp1 = load i32, i32* %p1, align 2
    240   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    241   %tmp2 = load i32, i32* %p2, align 2
    242   %sexttmp1 = sext i32 %tmp1 to i64
    243   %sexttmp2 = zext i32 %tmp2 to i64
    244   %tmp3 = add i64 %sexttmp1, %sexttmp2
    245   ret i64 %tmp3
    246 }
    247 
    248 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
    249 ; CHECK-LABEL: pairUpBarelyOut
    250 ; CHECK-NOT: ldp
    251 ; Don't be fragile about which loads or manipulations of the base register
    252 ; are used---just check that there isn't an ldp before the add
    253 ; CHECK: add
    254 ; CHECK-NEXT: ret
    255   %p1 = getelementptr inbounds i64, i64* %a, i64 -32
    256   %tmp1 = load i64, i64* %p1, align 2
    257   %p2 = getelementptr inbounds i64, i64* %a, i64 -33
    258   %tmp2 = load i64, i64* %p2, align 2
    259   %tmp3 = add i64 %tmp1, %tmp2
    260   ret i64 %tmp3
    261 }
    262 
    263 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
    264 ; CHECK-LABEL: pairUpBarelyOutSext
    265 ; CHECK-NOT: ldp
    266 ; Don't be fragile about which loads or manipulations of the base register
    267 ; are used---just check that there isn't an ldp before the add
    268 ; CHECK: add
    269 ; CHECK-NEXT: ret
    270   %p1 = getelementptr inbounds i32, i32* %a, i64 -64
    271   %tmp1 = load i32, i32* %p1, align 2
    272   %p2 = getelementptr inbounds i32, i32* %a, i64 -65
    273   %tmp2 = load i32, i32* %p2, align 2
    274   %sexttmp1 = sext i32 %tmp1 to i64
    275   %sexttmp2 = sext i32 %tmp2 to i64
    276   %tmp3 = add i64 %sexttmp1, %sexttmp2
    277   ret i64 %tmp3
    278 }
    279 
    280 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
    281 ; CHECK-LABEL: pairUpNotAligned
    282 ; CHECK-NOT: ldp
    283 ; CHECK: ldur
    284 ; CHECK-NEXT: ldur
    285 ; CHECK-NEXT: add
    286 ; CHECK-NEXT: ret
    287   %p1 = getelementptr inbounds i64, i64* %a, i64 -18
    288   %bp1 = bitcast i64* %p1 to i8*
    289   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
    290   %dp1 = bitcast i8* %bp1p1 to i64*
    291   %tmp1 = load i64, i64* %dp1, align 1
    292 
    293   %p2 = getelementptr inbounds i64, i64* %a, i64 -17
    294   %bp2 = bitcast i64* %p2 to i8*
    295   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
    296   %dp2 = bitcast i8* %bp2p1 to i64*
    297   %tmp2 = load i64, i64* %dp2, align 1
    298 
    299   %tmp3 = add i64 %tmp1, %tmp2
    300   ret i64 %tmp3
    301 }
    302 
    303 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
    304 ; CHECK-LABEL: pairUpNotAlignedSext
    305 ; CHECK-NOT: ldp
    306 ; CHECK: ldursw
    307 ; CHECK-NEXT: ldursw
    308 ; CHECK-NEXT: add
    309 ; CHECK-NEXT: ret
    310   %p1 = getelementptr inbounds i32, i32* %a, i64 -18
    311   %bp1 = bitcast i32* %p1 to i8*
    312   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
    313   %dp1 = bitcast i8* %bp1p1 to i32*
    314   %tmp1 = load i32, i32* %dp1, align 1
    315 
    316   %p2 = getelementptr inbounds i32, i32* %a, i64 -17
    317   %bp2 = bitcast i32* %p2 to i8*
    318   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
    319   %dp2 = bitcast i8* %bp2p1 to i32*
    320   %tmp2 = load i32, i32* %dp2, align 1
    321 
    322   %sexttmp1 = sext i32 %tmp1 to i64
    323   %sexttmp2 = sext i32 %tmp2 to i64
    324   %tmp3 = add i64 %sexttmp1, %sexttmp2
    325  ret i64 %tmp3
    326 }
    327 
    328 declare void @use-ptr(i32*)
    329 
    330 ; CHECK-LABEL: ldp_sext_int_pre
    331 ; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
    332 define i64 @ldp_sext_int_pre(i32* %p) nounwind {
    333   %ptr = getelementptr inbounds i32, i32* %p, i64 2
    334   call void @use-ptr(i32* %ptr)
    335   %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
    336   %tmp = load i32, i32* %add.ptr, align 4
    337   %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
    338   %tmp1 = load i32, i32* %add.ptr1, align 4
    339   %sexttmp = sext i32 %tmp to i64
    340   %sexttmp1 = sext i32 %tmp1 to i64
    341   %add = add nsw i64 %sexttmp1, %sexttmp
    342   ret i64 %add
    343 }
    344 
    345 ; CHECK-LABEL: ldp_sext_int_post
    346 ; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
    347 define i64 @ldp_sext_int_post(i32* %p) nounwind {
    348   %tmp = load i32, i32* %p, align 4
    349   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
    350   %tmp1 = load i32, i32* %add.ptr, align 4
    351   %sexttmp = sext i32 %tmp to i64
    352   %sexttmp1 = sext i32 %tmp1 to i64
    353   %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
    354   call void @use-ptr(i32* %ptr)
    355   %add = add nsw i64 %sexttmp1, %sexttmp
    356   ret i64 %add
    357 }
    358 
    359