Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
      2 
      3 ; CHECK-LABEL: ldp_int
      4 ; CHECK: ldp
      5 define i32 @ldp_int(i32* %p) nounwind {
      6   %tmp = load i32, i32* %p, align 4
      7   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
      8   %tmp1 = load i32, i32* %add.ptr, align 4
      9   %add = add nsw i32 %tmp1, %tmp
     10   ret i32 %add
     11 }
     12 
     13 ; CHECK-LABEL: ldp_sext_int
     14 ; CHECK: ldpsw
     15 define i64 @ldp_sext_int(i32* %p) nounwind {
     16   %tmp = load i32, i32* %p, align 4
     17   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     18   %tmp1 = load i32, i32* %add.ptr, align 4
     19   %sexttmp = sext i32 %tmp to i64
     20   %sexttmp1 = sext i32 %tmp1 to i64
     21   %add = add nsw i64 %sexttmp1, %sexttmp
     22   ret i64 %add
     23 }
     24 
     25 ; CHECK-LABEL: ldp_half_sext_res0_int:
     26 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
     27 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
     28 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
     29   %tmp = load i32, i32* %p, align 4
     30   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     31   %tmp1 = load i32, i32* %add.ptr, align 4
     32   %sexttmp = sext i32 %tmp to i64
     33   %sexttmp1 = zext i32 %tmp1 to i64
     34   %add = add nsw i64 %sexttmp1, %sexttmp
     35   ret i64 %add
     36 }
     37 
     38 ; CHECK-LABEL: ldp_half_sext_res1_int:
     39 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
     40 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
     41 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
     42   %tmp = load i32, i32* %p, align 4
     43   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     44   %tmp1 = load i32, i32* %add.ptr, align 4
     45   %sexttmp = zext i32 %tmp to i64
     46   %sexttmp1 = sext i32 %tmp1 to i64
     47   %add = add nsw i64 %sexttmp1, %sexttmp
     48   ret i64 %add
     49 }
     50 
     51 
     52 ; CHECK-LABEL: ldp_long
     53 ; CHECK: ldp
     54 define i64 @ldp_long(i64* %p) nounwind {
     55   %tmp = load i64, i64* %p, align 8
     56   %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
     57   %tmp1 = load i64, i64* %add.ptr, align 8
     58   %add = add nsw i64 %tmp1, %tmp
     59   ret i64 %add
     60 }
     61 
     62 ; CHECK-LABEL: ldp_float
     63 ; CHECK: ldp
     64 define float @ldp_float(float* %p) nounwind {
     65   %tmp = load float, float* %p, align 4
     66   %add.ptr = getelementptr inbounds float, float* %p, i64 1
     67   %tmp1 = load float, float* %add.ptr, align 4
     68   %add = fadd float %tmp, %tmp1
     69   ret float %add
     70 }
     71 
     72 ; CHECK-LABEL: ldp_double
     73 ; CHECK: ldp
     74 define double @ldp_double(double* %p) nounwind {
     75   %tmp = load double, double* %p, align 8
     76   %add.ptr = getelementptr inbounds double, double* %p, i64 1
     77   %tmp1 = load double, double* %add.ptr, align 8
     78   %add = fadd double %tmp, %tmp1
     79   ret double %add
     80 }
     81 
     82 ; CHECK-LABEL: ldp_doublex2
     83 ; CHECK: ldp
     84 define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind {
     85   %tmp = load <2 x double>, <2 x double>* %p, align 16
     86   %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1
     87   %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16
     88   %add = fadd <2 x double> %tmp, %tmp1
     89   ret <2 x double> %add
     90 }
     91 
     92 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
     93 define i32 @ldur_int(i32* %a) nounwind {
     94 ; CHECK-LABEL: ldur_int
     95 ; CHECK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
     96 ; CHECK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
     97 ; CHECK-NEXT: ret
     98   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
     99   %tmp1 = load i32, i32* %p1, align 2
    100   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    101   %tmp2 = load i32, i32* %p2, align 2
    102   %tmp3 = add i32 %tmp1, %tmp2
    103   ret i32 %tmp3
    104 }
    105 
    106 define i64 @ldur_sext_int(i32* %a) nounwind {
    107 ; CHECK-LABEL: ldur_sext_int
    108 ; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
    109 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    110 ; CHECK-NEXT: ret
    111   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    112   %tmp1 = load i32, i32* %p1, align 2
    113   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    114   %tmp2 = load i32, i32* %p2, align 2
    115   %sexttmp1 = sext i32 %tmp1 to i64
    116   %sexttmp2 = sext i32 %tmp2 to i64
    117   %tmp3 = add i64 %sexttmp1, %sexttmp2
    118   ret i64 %tmp3
    119 }
    120 
    121 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
    122 ; CHECK-LABEL: ldur_half_sext_int_res0
    123 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
    124 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
    125 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    126 ; CHECK-NEXT: ret
    127   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    128   %tmp1 = load i32, i32* %p1, align 2
    129   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    130   %tmp2 = load i32, i32* %p2, align 2
    131   %sexttmp1 = zext i32 %tmp1 to i64
    132   %sexttmp2 = sext i32 %tmp2 to i64
    133   %tmp3 = add i64 %sexttmp1, %sexttmp2
    134   ret i64 %tmp3
    135 }
    136 
    137 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
    138 ; CHECK-LABEL: ldur_half_sext_int_res1
    139 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
    140 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
    141 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    142 ; CHECK-NEXT: ret
    143   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
    144   %tmp1 = load i32, i32* %p1, align 2
    145   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
    146   %tmp2 = load i32, i32* %p2, align 2
    147   %sexttmp1 = sext i32 %tmp1 to i64
    148   %sexttmp2 = zext i32 %tmp2 to i64
    149   %tmp3 = add i64 %sexttmp1, %sexttmp2
    150   ret i64 %tmp3
    151 }
    152 
    153 
    154 define i64 @ldur_long(i64* %a) nounwind ssp {
    155 ; CHECK-LABEL: ldur_long
    156 ; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
    157 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    158 ; CHECK-NEXT: ret
    159   %p1 = getelementptr inbounds i64, i64* %a, i64 -1
    160   %tmp1 = load i64, i64* %p1, align 2
    161   %p2 = getelementptr inbounds i64, i64* %a, i64 -2
    162   %tmp2 = load i64, i64* %p2, align 2
    163   %tmp3 = add i64 %tmp1, %tmp2
    164   ret i64 %tmp3
    165 }
    166 
    167 define float @ldur_float(float* %a) {
    168 ; CHECK-LABEL: ldur_float
    169 ; CHECK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
    170 ; CHECK-NEXT: fadd    s{{[0-9]+}}, [[DST2]], [[DST1]]
    171 ; CHECK-NEXT: ret
    172   %p1 = getelementptr inbounds float, float* %a, i64 -1
    173   %tmp1 = load float, float* %p1, align 2
    174   %p2 = getelementptr inbounds float, float* %a, i64 -2
    175   %tmp2 = load float, float* %p2, align 2
    176   %tmp3 = fadd float %tmp1, %tmp2
    177   ret float %tmp3
    178 }
    179 
    180 define double @ldur_double(double* %a) {
    181 ; CHECK-LABEL: ldur_double
    182 ; CHECK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
    183 ; CHECK-NEXT: fadd    d{{[0-9]+}}, [[DST2]], [[DST1]]
    184 ; CHECK-NEXT: ret
    185   %p1 = getelementptr inbounds double, double* %a, i64 -1
    186   %tmp1 = load double, double* %p1, align 2
    187   %p2 = getelementptr inbounds double, double* %a, i64 -2
    188   %tmp2 = load double, double* %p2, align 2
    189   %tmp3 = fadd double %tmp1, %tmp2
    190   ret double %tmp3
    191 }
    192 
    193 define <2 x double> @ldur_doublex2(<2 x double>* %a) {
    194 ; CHECK-LABEL: ldur_doublex2
    195 ; CHECK: ldp     q[[DST1:[0-9]+]], q[[DST2:[0-9]+]], [x0, #-32]
    196 ; CHECK-NEXT: fadd    v{{[0-9]+}}.2d, v[[DST2]].2d, v[[DST1]].2d
    197 ; CHECK-NEXT: ret
    198   %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1
    199   %tmp1 = load <2 x double>, <2 x double>* %p1, align 2
    200   %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2
    201   %tmp2 = load <2 x double>, <2 x double>* %p2, align 2
    202   %tmp3 = fadd <2 x double> %tmp1, %tmp2
    203   ret <2 x double> %tmp3
    204 }
    205 
    206 ; Now check some boundary conditions
    207 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
    208 ; CHECK-LABEL: pairUpBarelyIn
    209 ; CHECK-NOT: ldur
    210 ; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
    211 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    212 ; CHECK-NEXT: ret
    213   %p1 = getelementptr inbounds i64, i64* %a, i64 -31
    214   %tmp1 = load i64, i64* %p1, align 2
    215   %p2 = getelementptr inbounds i64, i64* %a, i64 -32
    216   %tmp2 = load i64, i64* %p2, align 2
    217   %tmp3 = add i64 %tmp1, %tmp2
    218   ret i64 %tmp3
    219 }
    220 
    221 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
    222 ; CHECK-LABEL: pairUpBarelyInSext
    223 ; CHECK-NOT: ldur
    224 ; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
    225 ; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
    226 ; CHECK-NEXT: ret
    227   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    228   %tmp1 = load i32, i32* %p1, align 2
    229   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    230   %tmp2 = load i32, i32* %p2, align 2
    231   %sexttmp1 = sext i32 %tmp1 to i64
    232   %sexttmp2 = sext i32 %tmp2 to i64
    233   %tmp3 = add i64 %sexttmp1, %sexttmp2
    234   ret i64 %tmp3
    235 }
    236 
    237 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
    238 ; CHECK-LABEL: pairUpBarelyInHalfSextRes0
    239 ; CHECK-NOT: ldur
    240 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
    241 ; CHECK: sxtw     x[[DST1]], w[[DST1]]
    242 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    243 ; CHECK-NEXT: ret
    244   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    245   %tmp1 = load i32, i32* %p1, align 2
    246   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    247   %tmp2 = load i32, i32* %p2, align 2
    248   %sexttmp1 = zext i32 %tmp1 to i64
    249   %sexttmp2 = sext i32 %tmp2 to i64
    250   %tmp3 = add i64 %sexttmp1, %sexttmp2
    251   ret i64 %tmp3
    252 }
    253 
    254 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
    255 ; CHECK-LABEL: pairUpBarelyInHalfSextRes1
    256 ; CHECK-NOT: ldur
    257 ; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
    258 ; CHECK: sxtw     x[[DST2]], w[[DST2]]
    259 ; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
    260 ; CHECK-NEXT: ret
    261   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
    262   %tmp1 = load i32, i32* %p1, align 2
    263   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
    264   %tmp2 = load i32, i32* %p2, align 2
    265   %sexttmp1 = sext i32 %tmp1 to i64
    266   %sexttmp2 = zext i32 %tmp2 to i64
    267   %tmp3 = add i64 %sexttmp1, %sexttmp2
    268   ret i64 %tmp3
    269 }
    270 
    271 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
    272 ; CHECK-LABEL: pairUpBarelyOut
    273 ; CHECK-NOT: ldp
    274 ; Don't be fragile about which loads or manipulations of the base register
    275 ; are used---just check that there isn't an ldp before the add
    276 ; CHECK: add
    277 ; CHECK-NEXT: ret
    278   %p1 = getelementptr inbounds i64, i64* %a, i64 -32
    279   %tmp1 = load i64, i64* %p1, align 2
    280   %p2 = getelementptr inbounds i64, i64* %a, i64 -33
    281   %tmp2 = load i64, i64* %p2, align 2
    282   %tmp3 = add i64 %tmp1, %tmp2
    283   ret i64 %tmp3
    284 }
    285 
    286 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
    287 ; CHECK-LABEL: pairUpBarelyOutSext
    288 ; CHECK-NOT: ldp
    289 ; Don't be fragile about which loads or manipulations of the base register
    290 ; are used---just check that there isn't an ldp before the add
    291 ; CHECK: add
    292 ; CHECK-NEXT: ret
    293   %p1 = getelementptr inbounds i32, i32* %a, i64 -64
    294   %tmp1 = load i32, i32* %p1, align 2
    295   %p2 = getelementptr inbounds i32, i32* %a, i64 -65
    296   %tmp2 = load i32, i32* %p2, align 2
    297   %sexttmp1 = sext i32 %tmp1 to i64
    298   %sexttmp2 = sext i32 %tmp2 to i64
    299   %tmp3 = add i64 %sexttmp1, %sexttmp2
    300   ret i64 %tmp3
    301 }
    302 
    303 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
    304 ; CHECK-LABEL: pairUpNotAligned
    305 ; CHECK-NOT: ldp
    306 ; CHECK: ldur
    307 ; CHECK-NEXT: ldur
    308 ; CHECK-NEXT: add
    309 ; CHECK-NEXT: ret
    310   %p1 = getelementptr inbounds i64, i64* %a, i64 -18
    311   %bp1 = bitcast i64* %p1 to i8*
    312   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
    313   %dp1 = bitcast i8* %bp1p1 to i64*
    314   %tmp1 = load i64, i64* %dp1, align 1
    315 
    316   %p2 = getelementptr inbounds i64, i64* %a, i64 -17
    317   %bp2 = bitcast i64* %p2 to i8*
    318   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
    319   %dp2 = bitcast i8* %bp2p1 to i64*
    320   %tmp2 = load i64, i64* %dp2, align 1
    321 
    322   %tmp3 = add i64 %tmp1, %tmp2
    323   ret i64 %tmp3
    324 }
    325 
    326 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
    327 ; CHECK-LABEL: pairUpNotAlignedSext
    328 ; CHECK-NOT: ldp
    329 ; CHECK: ldursw
    330 ; CHECK-NEXT: ldursw
    331 ; CHECK-NEXT: add
    332 ; CHECK-NEXT: ret
    333   %p1 = getelementptr inbounds i32, i32* %a, i64 -18
    334   %bp1 = bitcast i32* %p1 to i8*
    335   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
    336   %dp1 = bitcast i8* %bp1p1 to i32*
    337   %tmp1 = load i32, i32* %dp1, align 1
    338 
    339   %p2 = getelementptr inbounds i32, i32* %a, i64 -17
    340   %bp2 = bitcast i32* %p2 to i8*
    341   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
    342   %dp2 = bitcast i8* %bp2p1 to i32*
    343   %tmp2 = load i32, i32* %dp2, align 1
    344 
    345   %sexttmp1 = sext i32 %tmp1 to i64
    346   %sexttmp2 = sext i32 %tmp2 to i64
    347   %tmp3 = add i64 %sexttmp1, %sexttmp2
    348  ret i64 %tmp3
    349 }
    350 
    351 declare void @use-ptr(i32*)
    352 
    353 ; CHECK-LABEL: ldp_sext_int_pre
    354 ; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
    355 define i64 @ldp_sext_int_pre(i32* %p) nounwind {
    356   %ptr = getelementptr inbounds i32, i32* %p, i64 2
    357   call void @use-ptr(i32* %ptr)
    358   %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
    359   %tmp = load i32, i32* %add.ptr, align 4
    360   %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
    361   %tmp1 = load i32, i32* %add.ptr1, align 4
    362   %sexttmp = sext i32 %tmp to i64
    363   %sexttmp1 = sext i32 %tmp1 to i64
    364   %add = add nsw i64 %sexttmp1, %sexttmp
    365   ret i64 %add
    366 }
    367 
    368 ; CHECK-LABEL: ldp_sext_int_post
    369 ; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
    370 define i64 @ldp_sext_int_post(i32* %p) nounwind {
    371   %tmp = load i32, i32* %p, align 4
    372   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
    373   %tmp1 = load i32, i32* %add.ptr, align 4
    374   %sexttmp = sext i32 %tmp to i64
    375   %sexttmp1 = sext i32 %tmp1 to i64
    376   %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
    377   call void @use-ptr(i32* %ptr)
    378   %add = add nsw i64 %sexttmp1, %sexttmp
    379   ret i64 %add
    380 }
    381 
    382