Home | History | Annotate | Download | only in SystemZ
      1 ; Test the saving and restoring of GPRs in large frames.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
      4 
      5 ; This is the largest frame size that can use a plain LMG for %r6 and above.
      6 ; It is big enough to require two emergency spill slots at 160(%r15),
      7 ; so get a frame of size 524232 by allocating (524232 - 176) / 8 = 65507
      8 ; extra doublewords.
      9 define void @f1(i32 *%ptr, i64 %x) {
     10 ; CHECK-LABEL: f1:
     11 ; CHECK: stmg %r6, %r15, 48(%r15)
     12 ; CHECK: .cfi_offset %r6, -112
     13 ; CHECK: .cfi_offset %r7, -104
     14 ; CHECK: .cfi_offset %r8, -96
     15 ; CHECK: .cfi_offset %r9, -88
     16 ; CHECK: .cfi_offset %r10, -80
     17 ; CHECK: .cfi_offset %r11, -72
     18 ; CHECK: .cfi_offset %r12, -64
     19 ; CHECK: .cfi_offset %r13, -56
     20 ; CHECK: .cfi_offset %r14, -48
     21 ; CHECK: .cfi_offset %r15, -40
     22 ; CHECK: agfi %r15, -524232
     23 ; CHECK: .cfi_def_cfa_offset 524392
     24 ; ...main function body...
     25 ; CHECK-NOT: ag
     26 ; CHECK: lmg %r6, %r15, 524280(%r15)
     27 ; CHECK: br %r14
     28   %l0 = load volatile i32 , i32 *%ptr
     29   %l1 = load volatile i32 , i32 *%ptr
     30   %l4 = load volatile i32 , i32 *%ptr
     31   %l5 = load volatile i32 , i32 *%ptr
     32   %l6 = load volatile i32 , i32 *%ptr
     33   %l7 = load volatile i32 , i32 *%ptr
     34   %l8 = load volatile i32 , i32 *%ptr
     35   %l9 = load volatile i32 , i32 *%ptr
     36   %l10 = load volatile i32 , i32 *%ptr
     37   %l11 = load volatile i32 , i32 *%ptr
     38   %l12 = load volatile i32 , i32 *%ptr
     39   %l13 = load volatile i32 , i32 *%ptr
     40   %l14 = load volatile i32 , i32 *%ptr
     41   %add0 = add i32 %l0, %l0
     42   %add1 = add i32 %l1, %add0
     43   %add4 = add i32 %l4, %add1
     44   %add5 = add i32 %l5, %add4
     45   %add6 = add i32 %l6, %add5
     46   %add7 = add i32 %l7, %add6
     47   %add8 = add i32 %l8, %add7
     48   %add9 = add i32 %l9, %add8
     49   %add10 = add i32 %l10, %add9
     50   %add11 = add i32 %l11, %add10
     51   %add12 = add i32 %l12, %add11
     52   %add13 = add i32 %l13, %add12
     53   %add14 = add i32 %l14, %add13
     54   store volatile i32 %add0, i32 *%ptr
     55   store volatile i32 %add1, i32 *%ptr
     56   store volatile i32 %add4, i32 *%ptr
     57   store volatile i32 %add5, i32 *%ptr
     58   store volatile i32 %add6, i32 *%ptr
     59   store volatile i32 %add7, i32 *%ptr
     60   store volatile i32 %add8, i32 *%ptr
     61   store volatile i32 %add9, i32 *%ptr
     62   store volatile i32 %add10, i32 *%ptr
     63   store volatile i32 %add11, i32 *%ptr
     64   store volatile i32 %add12, i32 *%ptr
     65   store volatile i32 %add13, i32 *%ptr
     66   store volatile i32 %add14, i32 *%ptr
     67   %y = alloca [65507 x i64], align 8
     68   %entry = getelementptr inbounds [65507 x i64], [65507 x i64]* %y, i64 0, i64 0
     69   store volatile i64 %x, i64* %entry
     70   ret void
     71 }
     72 
     73 ; This is the largest frame size that can use a plain LMG for %r14 and above
     74 ; It is big enough to require two emergency spill slots at 160(%r15),
     75 ; so get a frame of size 524168 by allocating (524168 - 176) / 8 = 65499
     76 ; extra doublewords.
     77 define void @f2(i32 *%ptr, i64 %x) {
     78 ; CHECK-LABEL: f2:
     79 ; CHECK: stmg %r14, %r15, 112(%r15)
     80 ; CHECK: .cfi_offset %r14, -48
     81 ; CHECK: .cfi_offset %r15, -40
     82 ; CHECK: agfi %r15, -524168
     83 ; CHECK: .cfi_def_cfa_offset 524328
     84 ; ...main function body...
     85 ; CHECK-NOT: ag
     86 ; CHECK: lmg %r14, %r15, 524280(%r15)
     87 ; CHECK: br %r14
     88   %l0 = load volatile i32 , i32 *%ptr
     89   %l1 = load volatile i32 , i32 *%ptr
     90   %l4 = load volatile i32 , i32 *%ptr
     91   %l5 = load volatile i32 , i32 *%ptr
     92   %l14 = load volatile i32 , i32 *%ptr
     93   %add0 = add i32 %l0, %l0
     94   %add1 = add i32 %l1, %add0
     95   %add4 = add i32 %l4, %add1
     96   %add5 = add i32 %l5, %add4
     97   %add14 = add i32 %l14, %add5
     98   store volatile i32 %add0, i32 *%ptr
     99   store volatile i32 %add1, i32 *%ptr
    100   store volatile i32 %add4, i32 *%ptr
    101   store volatile i32 %add5, i32 *%ptr
    102   store volatile i32 %add14, i32 *%ptr
    103   %y = alloca [65499 x i64], align 8
    104   %entry = getelementptr inbounds [65499 x i64], [65499 x i64]* %y, i64 0, i64 0
    105   store volatile i64 %x, i64* %entry
    106   ret void
    107 }
    108 
    109 ; Like f1 but with a frame that is 8 bytes bigger.  This is the smallest
    110 ; frame size that needs two instructions to perform the final LMG for
    111 ; %r6 and above.
    112 define void @f3(i32 *%ptr, i64 %x) {
    113 ; CHECK-LABEL: f3:
    114 ; CHECK: stmg %r6, %r15, 48(%r15)
    115 ; CHECK: .cfi_offset %r6, -112
    116 ; CHECK: .cfi_offset %r7, -104
    117 ; CHECK: .cfi_offset %r8, -96
    118 ; CHECK: .cfi_offset %r9, -88
    119 ; CHECK: .cfi_offset %r10, -80
    120 ; CHECK: .cfi_offset %r11, -72
    121 ; CHECK: .cfi_offset %r12, -64
    122 ; CHECK: .cfi_offset %r13, -56
    123 ; CHECK: .cfi_offset %r14, -48
    124 ; CHECK: .cfi_offset %r15, -40
    125 ; CHECK: agfi %r15, -524240
    126 ; CHECK: .cfi_def_cfa_offset 524400
    127 ; ...main function body...
    128 ; CHECK: aghi %r15, 8
    129 ; CHECK: lmg %r6, %r15, 524280(%r15)
    130 ; CHECK: br %r14
    131   %l0 = load volatile i32 , i32 *%ptr
    132   %l1 = load volatile i32 , i32 *%ptr
    133   %l4 = load volatile i32 , i32 *%ptr
    134   %l5 = load volatile i32 , i32 *%ptr
    135   %l6 = load volatile i32 , i32 *%ptr
    136   %l7 = load volatile i32 , i32 *%ptr
    137   %l8 = load volatile i32 , i32 *%ptr
    138   %l9 = load volatile i32 , i32 *%ptr
    139   %l10 = load volatile i32 , i32 *%ptr
    140   %l11 = load volatile i32 , i32 *%ptr
    141   %l12 = load volatile i32 , i32 *%ptr
    142   %l13 = load volatile i32 , i32 *%ptr
    143   %l14 = load volatile i32 , i32 *%ptr
    144   %add0 = add i32 %l0, %l0
    145   %add1 = add i32 %l1, %add0
    146   %add4 = add i32 %l4, %add1
    147   %add5 = add i32 %l5, %add4
    148   %add6 = add i32 %l6, %add5
    149   %add7 = add i32 %l7, %add6
    150   %add8 = add i32 %l8, %add7
    151   %add9 = add i32 %l9, %add8
    152   %add10 = add i32 %l10, %add9
    153   %add11 = add i32 %l11, %add10
    154   %add12 = add i32 %l12, %add11
    155   %add13 = add i32 %l13, %add12
    156   %add14 = add i32 %l14, %add13
    157   store volatile i32 %add0, i32 *%ptr
    158   store volatile i32 %add1, i32 *%ptr
    159   store volatile i32 %add4, i32 *%ptr
    160   store volatile i32 %add5, i32 *%ptr
    161   store volatile i32 %add6, i32 *%ptr
    162   store volatile i32 %add7, i32 *%ptr
    163   store volatile i32 %add8, i32 *%ptr
    164   store volatile i32 %add9, i32 *%ptr
    165   store volatile i32 %add10, i32 *%ptr
    166   store volatile i32 %add11, i32 *%ptr
    167   store volatile i32 %add12, i32 *%ptr
    168   store volatile i32 %add13, i32 *%ptr
    169   store volatile i32 %add14, i32 *%ptr
    170   %y = alloca [65508 x i64], align 8
    171   %entry = getelementptr inbounds [65508 x i64], [65508 x i64]* %y, i64 0, i64 0
    172   store volatile i64 %x, i64* %entry
    173   ret void
    174 }
    175 
    176 ; Like f2 but with a frame that is 8 bytes bigger.  This is the smallest
    177 ; frame size that needs two instructions to perform the final LMG for
    178 ; %r14 and %r15.
    179 define void @f4(i32 *%ptr, i64 %x) {
    180 ; CHECK-LABEL: f4:
    181 ; CHECK: stmg %r14, %r15, 112(%r15)
    182 ; CHECK: .cfi_offset %r14, -48
    183 ; CHECK: .cfi_offset %r15, -40
    184 ; CHECK: agfi %r15, -524176
    185 ; CHECK: .cfi_def_cfa_offset 524336
    186 ; ...main function body...
    187 ; CHECK: aghi %r15, 8
    188 ; CHECK: lmg %r14, %r15, 524280(%r15)
    189 ; CHECK: br %r14
    190   %l0 = load volatile i32 , i32 *%ptr
    191   %l1 = load volatile i32 , i32 *%ptr
    192   %l4 = load volatile i32 , i32 *%ptr
    193   %l5 = load volatile i32 , i32 *%ptr
    194   %l14 = load volatile i32 , i32 *%ptr
    195   %add0 = add i32 %l0, %l0
    196   %add1 = add i32 %l1, %add0
    197   %add4 = add i32 %l4, %add1
    198   %add5 = add i32 %l5, %add4
    199   %add14 = add i32 %l14, %add5
    200   store volatile i32 %add0, i32 *%ptr
    201   store volatile i32 %add1, i32 *%ptr
    202   store volatile i32 %add4, i32 *%ptr
    203   store volatile i32 %add5, i32 *%ptr
    204   store volatile i32 %add14, i32 *%ptr
    205   %y = alloca [65500 x i64], align 8
    206   %entry = getelementptr inbounds [65500 x i64], [65500 x i64]* %y, i64 0, i64 0
    207   store volatile i64 %x, i64* %entry
    208   ret void
    209 }
    210 
    211 ; This is the largest frame size for which the preparatory increment for
    212 ; "lmg %r14, %r15, ..." can be done using AGHI.
    213 define void @f5(i32 *%ptr, i64 %x) {
    214 ; CHECK-LABEL: f5:
    215 ; CHECK: stmg %r14, %r15, 112(%r15)
    216 ; CHECK: .cfi_offset %r14, -48
    217 ; CHECK: .cfi_offset %r15, -40
    218 ; CHECK: agfi %r15, -556928
    219 ; CHECK: .cfi_def_cfa_offset 557088
    220 ; ...main function body...
    221 ; CHECK: aghi %r15, 32760
    222 ; CHECK: lmg %r14, %r15, 524280(%r15)
    223 ; CHECK: br %r14
    224   %l0 = load volatile i32 , i32 *%ptr
    225   %l1 = load volatile i32 , i32 *%ptr
    226   %l4 = load volatile i32 , i32 *%ptr
    227   %l5 = load volatile i32 , i32 *%ptr
    228   %l14 = load volatile i32 , i32 *%ptr
    229   %add0 = add i32 %l0, %l0
    230   %add1 = add i32 %l1, %add0
    231   %add4 = add i32 %l4, %add1
    232   %add5 = add i32 %l5, %add4
    233   %add14 = add i32 %l14, %add5
    234   store volatile i32 %add0, i32 *%ptr
    235   store volatile i32 %add1, i32 *%ptr
    236   store volatile i32 %add4, i32 *%ptr
    237   store volatile i32 %add5, i32 *%ptr
    238   store volatile i32 %add14, i32 *%ptr
    239   %y = alloca [69594 x i64], align 8
    240   %entry = getelementptr inbounds [69594 x i64], [69594 x i64]* %y, i64 0, i64 0
    241   store volatile i64 %x, i64* %entry
    242   ret void
    243 }
    244 
    245 ; This is the smallest frame size for which the preparatory increment for
    246 ; "lmg %r14, %r15, ..." needs to be done using AGFI.
    247 define void @f6(i32 *%ptr, i64 %x) {
    248 ; CHECK-LABEL: f6:
    249 ; CHECK: stmg %r14, %r15, 112(%r15)
    250 ; CHECK: .cfi_offset %r14, -48
    251 ; CHECK: .cfi_offset %r15, -40
    252 ; CHECK: agfi %r15, -556936
    253 ; CHECK: .cfi_def_cfa_offset 557096
    254 ; ...main function body...
    255 ; CHECK: agfi %r15, 32768
    256 ; CHECK: lmg %r14, %r15, 524280(%r15)
    257 ; CHECK: br %r14
    258   %l0 = load volatile i32 , i32 *%ptr
    259   %l1 = load volatile i32 , i32 *%ptr
    260   %l4 = load volatile i32 , i32 *%ptr
    261   %l5 = load volatile i32 , i32 *%ptr
    262   %l14 = load volatile i32 , i32 *%ptr
    263   %add0 = add i32 %l0, %l0
    264   %add1 = add i32 %l1, %add0
    265   %add4 = add i32 %l4, %add1
    266   %add5 = add i32 %l5, %add4
    267   %add14 = add i32 %l14, %add5
    268   store volatile i32 %add0, i32 *%ptr
    269   store volatile i32 %add1, i32 *%ptr
    270   store volatile i32 %add4, i32 *%ptr
    271   store volatile i32 %add5, i32 *%ptr
    272   store volatile i32 %add14, i32 *%ptr
    273   %y = alloca [69595 x i64], align 8
    274   %entry = getelementptr inbounds [69595 x i64], [69595 x i64]* %y, i64 0, i64 0
    275   store volatile i64 %x, i64* %entry
    276   ret void
    277 }
    278