Home | History | Annotate | Download | only in SystemZ
      1 ; Test 64-bit additions of constants to memory.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
      4 
      5 declare i64 @foo()
      6 
      7 ; Check addition of 1.
      8 define zeroext i1 @f1(i64 *%ptr) {
      9 ; CHECK-LABEL: f1:
     10 ; CHECK: algsi 0(%r2), 1
     11 ; CHECK: ipm [[REG:%r[0-5]]]
     12 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
     13 ; CHECK: br %r14
     14   %a = load i64, i64 *%ptr
     15   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
     16   %val = extractvalue {i64, i1} %t, 0
     17   %obit = extractvalue {i64, i1} %t, 1
     18   store i64 %val, i64 *%ptr
     19   ret i1 %obit
     20 }
     21 
     22 ; Check the high end of the constant range.
     23 define zeroext i1 @f2(i64 *%ptr) {
     24 ; CHECK-LABEL: f2:
     25 ; CHECK: algsi 0(%r2), 127
     26 ; CHECK: ipm [[REG:%r[0-5]]]
     27 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
     28 ; CHECK: br %r14
     29   %a = load i64, i64 *%ptr
     30   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 127)
     31   %val = extractvalue {i64, i1} %t, 0
     32   %obit = extractvalue {i64, i1} %t, 1
     33   store i64 %val, i64 *%ptr
     34   ret i1 %obit
     35 }
     36 
     37 ; Check the next constant up, which must use an addition and a store.
     38 define zeroext i1 @f3(i64 %dummy, i64 *%ptr) {
     39 ; CHECK-LABEL: f3:
     40 ; CHECK: lg [[VAL:%r[0-5]]], 0(%r3)
     41 ; CHECK: algfi [[VAL]], 128
     42 ; CHECK-DAG: stg [[VAL]], 0(%r3)
     43 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
     44 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
     45 ; CHECK: br %r14
     46   %a = load i64, i64 *%ptr
     47   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 128)
     48   %val = extractvalue {i64, i1} %t, 0
     49   %obit = extractvalue {i64, i1} %t, 1
     50   store i64 %val, i64 *%ptr
     51   ret i1 %obit
     52 }
     53 
     54 ; Check the low end of the constant range.
     55 define zeroext i1 @f4(i64 *%ptr) {
     56 ; CHECK-LABEL: f4:
     57 ; CHECK: algsi 0(%r2), -128
     58 ; CHECK: ipm [[REG:%r[0-5]]]
     59 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
     60 ; CHECK: br %r14
     61   %a = load i64, i64 *%ptr
     62   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -128)
     63   %val = extractvalue {i64, i1} %t, 0
     64   %obit = extractvalue {i64, i1} %t, 1
     65   store i64 %val, i64 *%ptr
     66   ret i1 %obit
     67 }
     68 
     69 ; Check the next value down, with the same comment as f3.
     70 define zeroext i1 @f5(i64 %dummy, i64 *%ptr) {
     71 ; CHECK-LABEL: f5:
     72 ; CHECK: lg [[VAL1:%r[0-5]]], 0(%r3)
     73 ; CHECK: lghi [[VAL2:%r[0-9]+]], -129
     74 ; CHECK: algr [[VAL2]], [[VAL1]]
     75 ; CHECK-DAG: stg [[VAL2]], 0(%r3)
     76 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
     77 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
     78 ; CHECK: br %r14
     79   %a = load i64, i64 *%ptr
     80   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -129)
     81   %val = extractvalue {i64, i1} %t, 0
     82   %obit = extractvalue {i64, i1} %t, 1
     83   store i64 %val, i64 *%ptr
     84   ret i1 %obit
     85 }
     86 
     87 ; Check the high end of the aligned ALGSI range.
     88 define zeroext i1 @f6(i64 *%base) {
     89 ; CHECK-LABEL: f6:
     90 ; CHECK: algsi 524280(%r2), 1
     91 ; CHECK: ipm [[REG:%r[0-5]]]
     92 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
     93 ; CHECK: br %r14
     94   %ptr = getelementptr i64, i64 *%base, i64 65535
     95   %a = load i64, i64 *%ptr
     96   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
     97   %val = extractvalue {i64, i1} %t, 0
     98   %obit = extractvalue {i64, i1} %t, 1
     99   store i64 %val, i64 *%ptr
    100   ret i1 %obit
    101 }
    102 
    103 ; Check the next word up, which must use separate address logic.
    104 ; Other sequences besides this one would be OK.
    105 define zeroext i1 @f7(i64 *%base) {
    106 ; CHECK-LABEL: f7:
    107 ; CHECK: agfi %r2, 524288
    108 ; CHECK: algsi 0(%r2), 1
    109 ; CHECK: ipm [[REG:%r[0-5]]]
    110 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
    111 ; CHECK: br %r14
    112   %ptr = getelementptr i64, i64 *%base, i64 65536
    113   %a = load i64, i64 *%ptr
    114   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
    115   %val = extractvalue {i64, i1} %t, 0
    116   %obit = extractvalue {i64, i1} %t, 1
    117   store i64 %val, i64 *%ptr
    118   ret i1 %obit
    119 }
    120 
    121 ; Check the low end of the ALGSI range.
    122 define zeroext i1 @f8(i64 *%base) {
    123 ; CHECK-LABEL: f8:
    124 ; CHECK: algsi -524288(%r2), 1
    125 ; CHECK: ipm [[REG:%r[0-5]]]
    126 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
    127 ; CHECK: br %r14
    128   %ptr = getelementptr i64, i64 *%base, i64 -65536
    129   %a = load i64, i64 *%ptr
    130   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
    131   %val = extractvalue {i64, i1} %t, 0
    132   %obit = extractvalue {i64, i1} %t, 1
    133   store i64 %val, i64 *%ptr
    134   ret i1 %obit
    135 }
    136 
    137 ; Check the next word down, which must use separate address logic.
    138 ; Other sequences besides this one would be OK.
    139 define zeroext i1 @f9(i64 *%base) {
    140 ; CHECK-LABEL: f9:
    141 ; CHECK: agfi %r2, -524296
    142 ; CHECK: algsi 0(%r2), 1
    143 ; CHECK: ipm [[REG:%r[0-5]]]
    144 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
    145 ; CHECK: br %r14
    146   %ptr = getelementptr i64, i64 *%base, i64 -65537
    147   %a = load i64, i64 *%ptr
    148   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
    149   %val = extractvalue {i64, i1} %t, 0
    150   %obit = extractvalue {i64, i1} %t, 1
    151   store i64 %val, i64 *%ptr
    152   ret i1 %obit
    153 }
    154 
    155 ; Check that ALGSI does not allow indices.
    156 define zeroext i1 @f10(i64 %base, i64 %index) {
    157 ; CHECK-LABEL: f10:
    158 ; CHECK: agr %r2, %r3
    159 ; CHECK: algsi 8(%r2), 1
    160 ; CHECK: ipm [[REG:%r[0-5]]]
    161 ; CHECK: risbg %r2, [[REG]], 63, 191, 35
    162 ; CHECK: br %r14
    163   %add1 = add i64 %base, %index
    164   %add2 = add i64 %add1, 8
    165   %ptr = inttoptr i64 %add2 to i64 *
    166   %a = load i64, i64 *%ptr
    167   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
    168   %val = extractvalue {i64, i1} %t, 0
    169   %obit = extractvalue {i64, i1} %t, 1
    170   store i64 %val, i64 *%ptr
    171   ret i1 %obit
    172 }
    173 
    174 ; Check that adding 127 to a spilled value can use ALGSI.
    175 define zeroext i1 @f11(i64 *%ptr, i64 %sel) {
    176 ; CHECK-LABEL: f11:
    177 ; CHECK: algsi {{[0-9]+}}(%r15), 127
    178 ; CHECK: br %r14
    179 entry:
    180   %val0 = load volatile i64, i64 *%ptr
    181   %val1 = load volatile i64, i64 *%ptr
    182   %val2 = load volatile i64, i64 *%ptr
    183   %val3 = load volatile i64, i64 *%ptr
    184   %val4 = load volatile i64, i64 *%ptr
    185   %val5 = load volatile i64, i64 *%ptr
    186   %val6 = load volatile i64, i64 *%ptr
    187   %val7 = load volatile i64, i64 *%ptr
    188   %val8 = load volatile i64, i64 *%ptr
    189   %val9 = load volatile i64, i64 *%ptr
    190   %val10 = load volatile i64, i64 *%ptr
    191   %val11 = load volatile i64, i64 *%ptr
    192   %val12 = load volatile i64, i64 *%ptr
    193   %val13 = load volatile i64, i64 *%ptr
    194   %val14 = load volatile i64, i64 *%ptr
    195   %val15 = load volatile i64, i64 *%ptr
    196 
    197   %test = icmp ne i64 %sel, 0
    198   br i1 %test, label %add, label %store
    199 
    200 add:
    201   %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val0, i64 127)
    202   %add0 = extractvalue {i64, i1} %t0, 0
    203   %obit0 = extractvalue {i64, i1} %t0, 1
    204   %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val1, i64 127)
    205   %add1 = extractvalue {i64, i1} %t1, 0
    206   %obit1 = extractvalue {i64, i1} %t1, 1
    207   %res1 = or i1 %obit0, %obit1
    208   %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val2, i64 127)
    209   %add2 = extractvalue {i64, i1} %t2, 0
    210   %obit2 = extractvalue {i64, i1} %t2, 1
    211   %res2 = or i1 %res1, %obit2
    212   %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val3, i64 127)
    213   %add3 = extractvalue {i64, i1} %t3, 0
    214   %obit3 = extractvalue {i64, i1} %t3, 1
    215   %res3 = or i1 %res2, %obit3
    216   %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val4, i64 127)
    217   %add4 = extractvalue {i64, i1} %t4, 0
    218   %obit4 = extractvalue {i64, i1} %t4, 1
    219   %res4 = or i1 %res3, %obit4
    220   %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val5, i64 127)
    221   %add5 = extractvalue {i64, i1} %t5, 0
    222   %obit5 = extractvalue {i64, i1} %t5, 1
    223   %res5 = or i1 %res4, %obit5
    224   %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val6, i64 127)
    225   %add6 = extractvalue {i64, i1} %t6, 0
    226   %obit6 = extractvalue {i64, i1} %t6, 1
    227   %res6 = or i1 %res5, %obit6
    228   %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val7, i64 127)
    229   %add7 = extractvalue {i64, i1} %t7, 0
    230   %obit7 = extractvalue {i64, i1} %t7, 1
    231   %res7 = or i1 %res6, %obit7
    232   %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val8, i64 127)
    233   %add8 = extractvalue {i64, i1} %t8, 0
    234   %obit8 = extractvalue {i64, i1} %t8, 1
    235   %res8 = or i1 %res7, %obit8
    236   %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val9, i64 127)
    237   %add9 = extractvalue {i64, i1} %t9, 0
    238   %obit9 = extractvalue {i64, i1} %t9, 1
    239   %res9 = or i1 %res8, %obit9
    240   %t10 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val10, i64 127)
    241   %add10 = extractvalue {i64, i1} %t10, 0
    242   %obit10 = extractvalue {i64, i1} %t10, 1
    243   %res10 = or i1 %res9, %obit10
    244   %t11 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val11, i64 127)
    245   %add11 = extractvalue {i64, i1} %t11, 0
    246   %obit11 = extractvalue {i64, i1} %t11, 1
    247   %res11 = or i1 %res10, %obit11
    248   %t12 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val12, i64 127)
    249   %add12 = extractvalue {i64, i1} %t12, 0
    250   %obit12 = extractvalue {i64, i1} %t12, 1
    251   %res12 = or i1 %res11, %obit12
    252   %t13 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val13, i64 127)
    253   %add13 = extractvalue {i64, i1} %t13, 0
    254   %obit13 = extractvalue {i64, i1} %t13, 1
    255   %res13 = or i1 %res12, %obit13
    256   %t14 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val14, i64 127)
    257   %add14 = extractvalue {i64, i1} %t14, 0
    258   %obit14 = extractvalue {i64, i1} %t14, 1
    259   %res14 = or i1 %res13, %obit14
    260   %t15 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val15, i64 127)
    261   %add15 = extractvalue {i64, i1} %t15, 0
    262   %obit15 = extractvalue {i64, i1} %t15, 1
    263   %res15 = or i1 %res14, %obit15
    264 
    265   br label %store
    266 
    267 store:
    268   %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ]
    269   %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ]
    270   %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ]
    271   %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ]
    272   %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ]
    273   %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ]
    274   %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ]
    275   %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ]
    276   %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ]
    277   %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ]
    278   %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ]
    279   %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ]
    280   %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ]
    281   %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ]
    282   %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ]
    283   %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ]
    284   %res = phi i1 [ 0, %entry ], [ %res15, %add ]
    285 
    286   store volatile i64 %new0, i64 *%ptr
    287   store volatile i64 %new1, i64 *%ptr
    288   store volatile i64 %new2, i64 *%ptr
    289   store volatile i64 %new3, i64 *%ptr
    290   store volatile i64 %new4, i64 *%ptr
    291   store volatile i64 %new5, i64 *%ptr
    292   store volatile i64 %new6, i64 *%ptr
    293   store volatile i64 %new7, i64 *%ptr
    294   store volatile i64 %new8, i64 *%ptr
    295   store volatile i64 %new9, i64 *%ptr
    296   store volatile i64 %new10, i64 *%ptr
    297   store volatile i64 %new11, i64 *%ptr
    298   store volatile i64 %new12, i64 *%ptr
    299   store volatile i64 %new13, i64 *%ptr
    300   store volatile i64 %new14, i64 *%ptr
    301   store volatile i64 %new15, i64 *%ptr
    302 
    303   ret i1 %res
    304 }
    305 
    306 ; Check using the overflow result for a branch.
    307 define void @f12(i64 *%ptr) {
    308 ; CHECK-LABEL: f12:
    309 ; CHECK: algsi 0(%r2), 1
    310 ; CHECK: jgnle foo@PLT
    311 ; CHECK: br %r14
    312   %a = load i64, i64 *%ptr
    313   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
    314   %val = extractvalue {i64, i1} %t, 0
    315   %obit = extractvalue {i64, i1} %t, 1
    316   store i64 %val, i64 *%ptr
    317   br i1 %obit, label %call, label %exit
    318 
    319 call:
    320   tail call i64 @foo()
    321   br label %exit
    322 
    323 exit:
    324   ret void
    325 }
    326 
    327 ; ... and the same with the inverted direction.
    328 define void @f13(i64 *%ptr) {
    329 ; CHECK-LABEL: f13:
    330 ; CHECK: algsi 0(%r2), 1
    331 ; CHECK: jgle foo@PLT
    332 ; CHECK: br %r14
    333   %a = load i64, i64 *%ptr
    334   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1)
    335   %val = extractvalue {i64, i1} %t, 0
    336   %obit = extractvalue {i64, i1} %t, 1
    337   store i64 %val, i64 *%ptr
    338   br i1 %obit, label %exit, label %call
    339 
    340 call:
    341   tail call i64 @foo()
    342   br label %exit
    343 
    344 exit:
    345   ret void
    346 }
    347 
    348 declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
    349 
    350