Home | History | Annotate | Download | only in SystemZ
      1 ; Test subtraction of a zero-extended i32 from an i64.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
      4 
      5 declare i64 @foo()
      6 
      7 ; Check SLGFR.
      8 define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
      9 ; CHECK-LABEL: f1:
     10 ; CHECK: slgfr %r3, %r4
     11 ; CHECK-DAG: stg %r3, 0(%r5)
     12 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
     13 ; CHECK-DAG: afi [[REG]], -536870912
     14 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
     15 ; CHECK: br %r14
     16   %bext = zext i32 %b to i64
     17   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
     18   %val = extractvalue {i64, i1} %t, 0
     19   %obit = extractvalue {i64, i1} %t, 1
     20   store i64 %val, i64 *%res
     21   ret i1 %obit
     22 }
     23 
     24 ; Check using the overflow result for a branch.
     25 define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
     26 ; CHECK-LABEL: f2:
     27 ; CHECK: slgfr %r3, %r4
     28 ; CHECK: stg %r3, 0(%r5)
     29 ; CHECK: jgle foo@PLT
     30 ; CHECK: br %r14
     31   %bext = zext i32 %b to i64
     32   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
     33   %val = extractvalue {i64, i1} %t, 0
     34   %obit = extractvalue {i64, i1} %t, 1
     35   store i64 %val, i64 *%res
     36   br i1 %obit, label %call, label %exit
     37 
     38 call:
     39   tail call i64 @foo()
     40   br label %exit
     41 
     42 exit:
     43   ret void
     44 }
     45 
     46 ; ... and the same with the inverted direction.
     47 define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
     48 ; CHECK-LABEL: f3:
     49 ; CHECK: slgfr %r3, %r4
     50 ; CHECK: stg %r3, 0(%r5)
     51 ; CHECK: jgnle foo@PLT
     52 ; CHECK: br %r14
     53   %bext = zext i32 %b to i64
     54   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
     55   %val = extractvalue {i64, i1} %t, 0
     56   %obit = extractvalue {i64, i1} %t, 1
     57   store i64 %val, i64 *%res
     58   br i1 %obit, label %exit, label %call
     59 
     60 call:
     61   tail call i64 @foo()
     62   br label %exit
     63 
     64 exit:
     65   ret void
     66 }
     67 
     68 ; Check SLGF with no displacement.
     69 define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
     70 ; CHECK-LABEL: f4:
     71 ; CHECK: slgf %r3, 0(%r4)
     72 ; CHECK-DAG: stg %r3, 0(%r5)
     73 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
     74 ; CHECK-DAG: afi [[REG]], -536870912
     75 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
     76 ; CHECK: br %r14
     77   %b = load i32, i32 *%src
     78   %bext = zext i32 %b to i64
     79   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
     80   %val = extractvalue {i64, i1} %t, 0
     81   %obit = extractvalue {i64, i1} %t, 1
     82   store i64 %val, i64 *%res
     83   ret i1 %obit
     84 }
     85 
     86 ; Check the high end of the aligned SLGF range.
     87 define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
     88 ; CHECK-LABEL: f5:
     89 ; CHECK: slgf %r3, 524284(%r4)
     90 ; CHECK-DAG: stg %r3, 0(%r5)
     91 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
     92 ; CHECK-DAG: afi [[REG]], -536870912
     93 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
     94 ; CHECK: br %r14
     95   %ptr = getelementptr i32, i32 *%src, i64 131071
     96   %b = load i32, i32 *%ptr
     97   %bext = zext i32 %b to i64
     98   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
     99   %val = extractvalue {i64, i1} %t, 0
    100   %obit = extractvalue {i64, i1} %t, 1
    101   store i64 %val, i64 *%res
    102   ret i1 %obit
    103 }
    104 
    105 ; Check the next doubleword up, which needs separate address logic.
    106 ; Other sequences besides this one would be OK.
    107 define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
    108 ; CHECK-LABEL: f6:
    109 ; CHECK: agfi %r4, 524288
    110 ; CHECK: slgf %r3, 0(%r4)
    111 ; CHECK-DAG: stg %r3, 0(%r5)
    112 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
    113 ; CHECK-DAG: afi [[REG]], -536870912
    114 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
    115 ; CHECK: br %r14
    116   %ptr = getelementptr i32, i32 *%src, i64 131072
    117   %b = load i32, i32 *%ptr
    118   %bext = zext i32 %b to i64
    119   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
    120   %val = extractvalue {i64, i1} %t, 0
    121   %obit = extractvalue {i64, i1} %t, 1
    122   store i64 %val, i64 *%res
    123   ret i1 %obit
    124 }
    125 
    126 ; Check the high end of the negative aligned SLGF range.
    127 define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
    128 ; CHECK-LABEL: f7:
    129 ; CHECK: slgf %r3, -4(%r4)
    130 ; CHECK-DAG: stg %r3, 0(%r5)
    131 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
    132 ; CHECK-DAG: afi [[REG]], -536870912
    133 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
    134 ; CHECK: br %r14
    135   %ptr = getelementptr i32, i32 *%src, i64 -1
    136   %b = load i32, i32 *%ptr
    137   %bext = zext i32 %b to i64
    138   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
    139   %val = extractvalue {i64, i1} %t, 0
    140   %obit = extractvalue {i64, i1} %t, 1
    141   store i64 %val, i64 *%res
    142   ret i1 %obit
    143 }
    144 
    145 ; Check the low end of the SLGF range.
    146 define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
    147 ; CHECK-LABEL: f8:
    148 ; CHECK: slgf %r3, -524288(%r4)
    149 ; CHECK-DAG: stg %r3, 0(%r5)
    150 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
    151 ; CHECK-DAG: afi [[REG]], -536870912
    152 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
    153 ; CHECK: br %r14
    154   %ptr = getelementptr i32, i32 *%src, i64 -131072
    155   %b = load i32, i32 *%ptr
    156   %bext = zext i32 %b to i64
    157   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
    158   %val = extractvalue {i64, i1} %t, 0
    159   %obit = extractvalue {i64, i1} %t, 1
    160   store i64 %val, i64 *%res
    161   ret i1 %obit
    162 }
    163 
    164 ; Check the next doubleword down, which needs separate address logic.
    165 ; Other sequences besides this one would be OK.
    166 define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
    167 ; CHECK-LABEL: f9:
    168 ; CHECK: agfi %r4, -524292
    169 ; CHECK: slgf %r3, 0(%r4)
    170 ; CHECK-DAG: stg %r3, 0(%r5)
    171 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
    172 ; CHECK-DAG: afi [[REG]], -536870912
    173 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
    174 ; CHECK: br %r14
    175   %ptr = getelementptr i32, i32 *%src, i64 -131073
    176   %b = load i32, i32 *%ptr
    177   %bext = zext i32 %b to i64
    178   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
    179   %val = extractvalue {i64, i1} %t, 0
    180   %obit = extractvalue {i64, i1} %t, 1
    181   store i64 %val, i64 *%res
    182   ret i1 %obit
    183 }
    184 
    185 ; Check that SLGF allows an index.
    186 define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) {
    187 ; CHECK-LABEL: f10:
    188 ; CHECK: slgf %r4, 524284({{%r3,%r2|%r2,%r3}})
    189 ; CHECK-DAG: stg %r4, 0(%r5)
    190 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
    191 ; CHECK-DAG: afi [[REG]], -536870912
    192 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
    193 ; CHECK: br %r14
    194   %add1 = add i64 %src, %index
    195   %add2 = add i64 %add1, 524284
    196   %ptr = inttoptr i64 %add2 to i32 *
    197   %b = load i32, i32 *%ptr
    198   %bext = zext i32 %b to i64
    199   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
    200   %val = extractvalue {i64, i1} %t, 0
    201   %obit = extractvalue {i64, i1} %t, 1
    202   store i64 %val, i64 *%res
    203   ret i1 %obit
    204 }
    205 
    206 ; Check that subtractions of spilled values can use SLGF rather than SLGFR.
    207 define zeroext i1 @f11(i32 *%ptr0) {
    208 ; CHECK-LABEL: f11:
    209 ; CHECK: brasl %r14, foo@PLT
    210 ; CHECK: slgf %r2, 160(%r15)
    211 ; CHECK: br %r14
    212   %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
    213   %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
    214   %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
    215   %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
    216   %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
    217   %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
    218   %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
    219   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
    220   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
    221 
    222   %val0 = load i32, i32 *%ptr0
    223   %val1 = load i32, i32 *%ptr1
    224   %val2 = load i32, i32 *%ptr2
    225   %val3 = load i32, i32 *%ptr3
    226   %val4 = load i32, i32 *%ptr4
    227   %val5 = load i32, i32 *%ptr5
    228   %val6 = load i32, i32 *%ptr6
    229   %val7 = load i32, i32 *%ptr7
    230   %val8 = load i32, i32 *%ptr8
    231   %val9 = load i32, i32 *%ptr9
    232 
    233   %frob0 = add i32 %val0, 100
    234   %frob1 = add i32 %val1, 100
    235   %frob2 = add i32 %val2, 100
    236   %frob3 = add i32 %val3, 100
    237   %frob4 = add i32 %val4, 100
    238   %frob5 = add i32 %val5, 100
    239   %frob6 = add i32 %val6, 100
    240   %frob7 = add i32 %val7, 100
    241   %frob8 = add i32 %val8, 100
    242   %frob9 = add i32 %val9, 100
    243 
    244   store i32 %frob0, i32 *%ptr0
    245   store i32 %frob1, i32 *%ptr1
    246   store i32 %frob2, i32 *%ptr2
    247   store i32 %frob3, i32 *%ptr3
    248   store i32 %frob4, i32 *%ptr4
    249   store i32 %frob5, i32 *%ptr5
    250   store i32 %frob6, i32 *%ptr6
    251   store i32 %frob7, i32 *%ptr7
    252   store i32 %frob8, i32 *%ptr8
    253   store i32 %frob9, i32 *%ptr9
    254 
    255   %ret = call i64 @foo()
    256 
    257   %ext0 = zext i32 %frob0 to i64
    258   %ext1 = zext i32 %frob1 to i64
    259   %ext2 = zext i32 %frob2 to i64
    260   %ext3 = zext i32 %frob3 to i64
    261   %ext4 = zext i32 %frob4 to i64
    262   %ext5 = zext i32 %frob5 to i64
    263   %ext6 = zext i32 %frob6 to i64
    264   %ext7 = zext i32 %frob7 to i64
    265   %ext8 = zext i32 %frob8 to i64
    266   %ext9 = zext i32 %frob9 to i64
    267 
    268   %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %ext0)
    269   %add0 = extractvalue {i64, i1} %t0, 0
    270   %obit0 = extractvalue {i64, i1} %t0, 1
    271   %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %ext1)
    272   %add1 = extractvalue {i64, i1} %t1, 0
    273   %obit1 = extractvalue {i64, i1} %t1, 1
    274   %res1 = or i1 %obit0, %obit1
    275   %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %ext2)
    276   %add2 = extractvalue {i64, i1} %t2, 0
    277   %obit2 = extractvalue {i64, i1} %t2, 1
    278   %res2 = or i1 %res1, %obit2
    279   %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %ext3)
    280   %add3 = extractvalue {i64, i1} %t3, 0
    281   %obit3 = extractvalue {i64, i1} %t3, 1
    282   %res3 = or i1 %res2, %obit3
    283   %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %ext4)
    284   %add4 = extractvalue {i64, i1} %t4, 0
    285   %obit4 = extractvalue {i64, i1} %t4, 1
    286   %res4 = or i1 %res3, %obit4
    287   %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %ext5)
    288   %add5 = extractvalue {i64, i1} %t5, 0
    289   %obit5 = extractvalue {i64, i1} %t5, 1
    290   %res5 = or i1 %res4, %obit5
    291   %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %ext6)
    292   %add6 = extractvalue {i64, i1} %t6, 0
    293   %obit6 = extractvalue {i64, i1} %t6, 1
    294   %res6 = or i1 %res5, %obit6
    295   %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %ext7)
    296   %add7 = extractvalue {i64, i1} %t7, 0
    297   %obit7 = extractvalue {i64, i1} %t7, 1
    298   %res7 = or i1 %res6, %obit7
    299   %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %ext8)
    300   %add8 = extractvalue {i64, i1} %t8, 0
    301   %obit8 = extractvalue {i64, i1} %t8, 1
    302   %res8 = or i1 %res7, %obit8
    303   %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %ext9)
    304   %add9 = extractvalue {i64, i1} %t9, 0
    305   %obit9 = extractvalue {i64, i1} %t9, 1
    306   %res9 = or i1 %res8, %obit9
    307 
    308   ret i1 %res9
    309 }
    310 
    311 declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
    312 
    313