Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
      2 ; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
      3 ; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
      4 
      5 ; CHECK: ldp_int
      6 ; CHECK: ldp
      7 define i32 @ldp_int(i32* %p) nounwind {
      8   %tmp = load i32, i32* %p, align 4
      9   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     10   %tmp1 = load i32, i32* %add.ptr, align 4
     11   %add = add nsw i32 %tmp1, %tmp
     12   ret i32 %add
     13 }
     14 
     15 ; CHECK: ldp_sext_int
     16 ; CHECK: ldpsw
     17 define i64 @ldp_sext_int(i32* %p) nounwind {
     18   %tmp = load i32, i32* %p, align 4
     19   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     20   %tmp1 = load i32, i32* %add.ptr, align 4
     21   %sexttmp = sext i32 %tmp to i64
     22   %sexttmp1 = sext i32 %tmp1 to i64
     23   %add = add nsw i64 %sexttmp1, %sexttmp
     24   ret i64 %add
     25 }
     26 
     27 ; CHECK-LABEL: ldp_half_sext_res0_int:
     28 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
     29 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
     30 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
     31   %tmp = load i32, i32* %p, align 4
     32   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     33   %tmp1 = load i32, i32* %add.ptr, align 4
     34   %sexttmp = sext i32 %tmp to i64
     35   %sexttmp1 = zext i32 %tmp1 to i64
     36   %add = add nsw i64 %sexttmp1, %sexttmp
     37   ret i64 %add
     38 }
     39 
     40 ; CHECK-LABEL: ldp_half_sext_res1_int:
     41 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
     42 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
     43 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
     44   %tmp = load i32, i32* %p, align 4
     45   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     46   %tmp1 = load i32, i32* %add.ptr, align 4
     47   %sexttmp = zext i32 %tmp to i64
     48   %sexttmp1 = sext i32 %tmp1 to i64
     49   %add = add nsw i64 %sexttmp1, %sexttmp
     50   ret i64 %add
     51 }
     52 
     53 
     54 ; CHECK: ldp_long
     55 ; CHECK: ldp
     56 define i64 @ldp_long(i64* %p) nounwind {
     57   %tmp = load i64, i64* %p, align 8
     58   %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
     59   %tmp1 = load i64, i64* %add.ptr, align 8
     60   %add = add nsw i64 %tmp1, %tmp
     61   ret i64 %add
     62 }
     63 
     64 ; CHECK: ldp_float
     65 ; CHECK: ldp
     66 define float @ldp_float(float* %p) nounwind {
     67   %tmp = load float, float* %p, align 4
     68   %add.ptr = getelementptr inbounds float, float* %p, i64 1
     69   %tmp1 = load float, float* %add.ptr, align 4
     70   %add = fadd float %tmp, %tmp1
     71   ret float %add
     72 }
     73 
     74 ; CHECK: ldp_double
     75 ; CHECK: ldp
     76 define double @ldp_double(double* %p) nounwind {
     77   %tmp = load double, double* %p, align 8
     78   %add.ptr = getelementptr inbounds double, double* %p, i64 1
     79   %tmp1 = load double, double* %add.ptr, align 8
     80   %add = fadd double %tmp, %tmp1
     81   ret double %add
     82 }
     83 
     84 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
     85 define i32 @ldur_int(i32* %a) nounwind {
     86 ; LDUR_CHK: ldur_int
     87 ; LDUR_CHK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
     88 ; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
     89 ; LDUR_CHK-NEXT: ret
     90   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
     91   %tmp1 = load i32, i32* %p1, align 2
     92   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
     93   %tmp2 = load i32, i32* %p2, align 2
     94   %tmp3 = add i32 %tmp1, %tmp2
     95   ret i32 %tmp3
     96 }
     97 
     98 define i64 @ldur_sext_int(i32* %a) nounwind {
     99 ; LDUR_CHK: ldur_sext_int
    100 ; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
    101 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    102 ; LDUR_CHK-NEXT: ret
    103   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    104   %tmp1 = load i32, i32* %p1, align 2
    105   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    106   %tmp2 = load i32, i32* %p2, align 2
    107   %sexttmp1 = sext i32 %tmp1 to i64
    108   %sexttmp2 = sext i32 %tmp2 to i64
    109   %tmp3 = add i64 %sexttmp1, %sexttmp2
    110   ret i64 %tmp3
    111 }
    112 
    113 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
    114 ; LDUR_CHK: ldur_half_sext_int_res0
    115 ; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
    116 ; LDUR_CHK: sxtw     x[[DST1]], w[[DST1]]
    117 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    118 ; LDUR_CHK-NEXT: ret
    119   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    120   %tmp1 = load i32, i32* %p1, align 2
    121   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    122   %tmp2 = load i32, i32* %p2, align 2
    123   %sexttmp1 = zext i32 %tmp1 to i64
    124   %sexttmp2 = sext i32 %tmp2 to i64
    125   %tmp3 = add i64 %sexttmp1, %sexttmp2
    126   ret i64 %tmp3
    127 }
    128 
    129 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
    130 ; LDUR_CHK: ldur_half_sext_int_res1
    131 ; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
    132 ; LDUR_CHK: sxtw     x[[DST2]], w[[DST2]]
    133 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    134 ; LDUR_CHK-NEXT: ret
    135   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    136   %tmp1 = load i32, i32* %p1, align 2
    137   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    138   %tmp2 = load i32, i32* %p2, align 2
    139   %sexttmp1 = sext i32 %tmp1 to i64
    140   %sexttmp2 = zext i32 %tmp2 to i64
    141   %tmp3 = add i64 %sexttmp1, %sexttmp2
    142   ret i64 %tmp3
    143 }
    144 
    145 
    146 define i64 @ldur_long(i64* %a) nounwind ssp {
    147 ; LDUR_CHK: ldur_long
    148 ; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
    149 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    150 ; LDUR_CHK-NEXT: ret
    151   %p1 = getelementptr inbounds i64, i64* %a, i64 -1
    152   %tmp1 = load i64, i64* %p1, align 2
    153   %p2 = getelementptr inbounds i64, i64* %a, i64 -2
    154   %tmp2 = load i64, i64* %p2, align 2
    155   %tmp3 = add i64 %tmp1, %tmp2
    156   ret i64 %tmp3
    157 }
    158 
    159 define float @ldur_float(float* %a) {
    160 ; LDUR_CHK: ldur_float
    161 ; LDUR_CHK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
    162 ; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
    163 ; LDUR_CHK-NEXT: ret
    164   %p1 = getelementptr inbounds float, float* %a, i64 -1
    165   %tmp1 = load float, float* %p1, align 2
    166   %p2 = getelementptr inbounds float, float* %a, i64 -2
    167   %tmp2 = load float, float* %p2, align 2
    168   %tmp3 = fadd float %tmp1, %tmp2
    169   ret float %tmp3
    170 }
    171 
    172 define double @ldur_double(double* %a) {
    173 ; LDUR_CHK: ldur_double
    174 ; LDUR_CHK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
    175 ; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
    176 ; LDUR_CHK-NEXT: ret
    177   %p1 = getelementptr inbounds double, double* %a, i64 -1
    178   %tmp1 = load double, double* %p1, align 2
    179   %p2 = getelementptr inbounds double, double* %a, i64 -2
    180   %tmp2 = load double, double* %p2, align 2
    181   %tmp3 = fadd double %tmp1, %tmp2
    182   ret double %tmp3
    183 }
    184 
    185 ; Now check some boundary conditions
    186 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
    187 ; LDUR_CHK: pairUpBarelyIn
    188 ; LDUR_CHK-NOT: ldur
    189 ; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
    190 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    191 ; LDUR_CHK-NEXT: ret
    192   %p1 = getelementptr inbounds i64, i64* %a, i64 -31
    193   %tmp1 = load i64, i64* %p1, align 2
    194   %p2 = getelementptr inbounds i64, i64* %a, i64 -32
    195   %tmp2 = load i64, i64* %p2, align 2
    196   %tmp3 = add i64 %tmp1, %tmp2
    197   ret i64 %tmp3
    198 }
    199 
    200 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
    201 ; LDUR_CHK: pairUpBarelyInSext
    202 ; LDUR_CHK-NOT: ldur
    203 ; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
    204 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    205 ; LDUR_CHK-NEXT: ret
    206   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    207   %tmp1 = load i32, i32* %p1, align 2
    208   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    209   %tmp2 = load i32, i32* %p2, align 2
    210   %sexttmp1 = sext i32 %tmp1 to i64
    211   %sexttmp2 = sext i32 %tmp2 to i64
    212   %tmp3 = add i64 %sexttmp1, %sexttmp2
    213   ret i64 %tmp3
    214 }
    215 
    216 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
    217 ; LDUR_CHK: pairUpBarelyInHalfSextRes0
    218 ; LDUR_CHK-NOT: ldur
    219 ; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
    220 ; LDUR_CHK: sxtw     x[[DST1]], w[[DST1]]
    221 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    222 ; LDUR_CHK-NEXT: ret
    223   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    224   %tmp1 = load i32, i32* %p1, align 2
    225   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    226   %tmp2 = load i32, i32* %p2, align 2
    227   %sexttmp1 = zext i32 %tmp1 to i64
    228   %sexttmp2 = sext i32 %tmp2 to i64
    229   %tmp3 = add i64 %sexttmp1, %sexttmp2
    230   ret i64 %tmp3
    231 }
    232 
    233 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
    234 ; LDUR_CHK: pairUpBarelyInHalfSextRes1
    235 ; LDUR_CHK-NOT: ldur
    236 ; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
    237 ; LDUR_CHK: sxtw     x[[DST2]], w[[DST2]]
    238 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    239 ; LDUR_CHK-NEXT: ret
    240   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    241   %tmp1 = load i32, i32* %p1, align 2
    242   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    243   %tmp2 = load i32, i32* %p2, align 2
    244   %sexttmp1 = sext i32 %tmp1 to i64
    245   %sexttmp2 = zext i32 %tmp2 to i64
    246   %tmp3 = add i64 %sexttmp1, %sexttmp2
    247   ret i64 %tmp3
    248 }
    249 
    250 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
    251 ; LDUR_CHK: pairUpBarelyOut
    252 ; LDUR_CHK-NOT: ldp
    253 ; Don't be fragile about which loads or manipulations of the base register
    254 ; are used---just check that there isn't an ldp before the add
    255 ; LDUR_CHK: add
    256 ; LDUR_CHK-NEXT: ret
    257   %p1 = getelementptr inbounds i64, i64* %a, i64 -32
    258   %tmp1 = load i64, i64* %p1, align 2
    259   %p2 = getelementptr inbounds i64, i64* %a, i64 -33
    260   %tmp2 = load i64, i64* %p2, align 2
    261   %tmp3 = add i64 %tmp1, %tmp2
    262   ret i64 %tmp3
    263 }
    264 
    265 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
    266 ; LDUR_CHK: pairUpBarelyOutSext
    267 ; LDUR_CHK-NOT: ldp
    268 ; Don't be fragile about which loads or manipulations of the base register
    269 ; are used---just check that there isn't an ldp before the add
    270 ; LDUR_CHK: add
    271 ; LDUR_CHK-NEXT: ret
    272   %p1 = getelementptr inbounds i32, i32* %a, i64 -64
    273   %tmp1 = load i32, i32* %p1, align 2
    274   %p2 = getelementptr inbounds i32, i32* %a, i64 -65
    275   %tmp2 = load i32, i32* %p2, align 2
    276   %sexttmp1 = sext i32 %tmp1 to i64
    277   %sexttmp2 = sext i32 %tmp2 to i64
    278   %tmp3 = add i64 %sexttmp1, %sexttmp2
    279   ret i64 %tmp3
    280 }
    281 
    282 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
    283 ; LDUR_CHK: pairUpNotAligned
    284 ; LDUR_CHK-NOT: ldp
    285 ; LDUR_CHK: ldur
    286 ; LDUR_CHK-NEXT: ldur
    287 ; LDUR_CHK-NEXT: add
    288 ; LDUR_CHK-NEXT: ret
    289   %p1 = getelementptr inbounds i64, i64* %a, i64 -18
    290   %bp1 = bitcast i64* %p1 to i8*
    291   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
    292   %dp1 = bitcast i8* %bp1p1 to i64*
    293   %tmp1 = load i64, i64* %dp1, align 1
    294 
    295   %p2 = getelementptr inbounds i64, i64* %a, i64 -17
    296   %bp2 = bitcast i64* %p2 to i8*
    297   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
    298   %dp2 = bitcast i8* %bp2p1 to i64*
    299   %tmp2 = load i64, i64* %dp2, align 1
    300 
    301   %tmp3 = add i64 %tmp1, %tmp2
    302   ret i64 %tmp3
    303 }
    304 
    305 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
    306 ; LDUR_CHK: pairUpNotAlignedSext
    307 ; LDUR_CHK-NOT: ldp
    308 ; LDUR_CHK: ldursw
    309 ; LDUR_CHK-NEXT: ldursw
    310 ; LDUR_CHK-NEXT: add
    311 ; LDUR_CHK-NEXT: ret
    312   %p1 = getelementptr inbounds i32, i32* %a, i64 -18
    313   %bp1 = bitcast i32* %p1 to i8*
    314   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
    315   %dp1 = bitcast i8* %bp1p1 to i32*
    316   %tmp1 = load i32, i32* %dp1, align 1
    317 
    318   %p2 = getelementptr inbounds i32, i32* %a, i64 -17
    319   %bp2 = bitcast i32* %p2 to i8*
    320   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
    321   %dp2 = bitcast i8* %bp2p1 to i32*
    322   %tmp2 = load i32, i32* %dp2, align 1
    323 
    324   %sexttmp1 = sext i32 %tmp1 to i64
    325   %sexttmp2 = sext i32 %tmp2 to i64
    326   %tmp3 = add i64 %sexttmp1, %sexttmp2
    327  ret i64 %tmp3
    328 }
    329