Home | History | Annotate | Download | only in SystemZ
      1 ; Test the saving and restoring of FPRs in large frames.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s
      4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
      5 
      6 ; Test a frame size that requires some FPRs to be saved and loaded using
      7 ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
      8 ; The frame is big enough to require two emergency spill slots at 160(%r15),
      9 ; as well as the 8 FPR save slots.  Get a frame of size 4128 by allocating
     10 ; (4128 - 176 - 8 * 8) / 8 = 486 extra doublewords.
     11 define void @f1(double *%ptr, i64 %x) {
     12 ; CHECK-NOFP-LABEL: f1:
     13 ; CHECK-NOFP: aghi %r15, -4128
     14 ; CHECK-NOFP: .cfi_def_cfa_offset 4288
     15 ; CHECK-NOFP: stdy %f8, 4120(%r15)
     16 ; CHECK-NOFP: stdy %f9, 4112(%r15)
     17 ; CHECK-NOFP: stdy %f10, 4104(%r15)
     18 ; CHECK-NOFP: stdy %f11, 4096(%r15)
     19 ; CHECK-NOFP: std %f12, 4088(%r15)
     20 ; CHECK-NOFP: std %f13, 4080(%r15)
     21 ; CHECK-NOFP: std %f14, 4072(%r15)
     22 ; CHECK-NOFP: std %f15, 4064(%r15)
     23 ; CHECK-NOFP: .cfi_offset %f8, -168
     24 ; CHECK-NOFP: .cfi_offset %f9, -176
     25 ; CHECK-NOFP: .cfi_offset %f10, -184
     26 ; CHECK-NOFP: .cfi_offset %f11, -192
     27 ; CHECK-NOFP: .cfi_offset %f12, -200
     28 ; CHECK-NOFP: .cfi_offset %f13, -208
     29 ; CHECK-NOFP: .cfi_offset %f14, -216
     30 ; CHECK-NOFP: .cfi_offset %f15, -224
     31 ; ...main function body...
     32 ; CHECK-NOFP: ldy %f8, 4120(%r15)
     33 ; CHECK-NOFP: ldy %f9, 4112(%r15)
     34 ; CHECK-NOFP: ldy %f10, 4104(%r15)
     35 ; CHECK-NOFP: ldy %f11, 4096(%r15)
     36 ; CHECK-NOFP: ld %f12, 4088(%r15)
     37 ; CHECK-NOFP: ld %f13, 4080(%r15)
     38 ; CHECK-NOFP: ld %f14, 4072(%r15)
     39 ; CHECK-NOFP: ld %f15, 4064(%r15)
     40 ; CHECK-NOFP: aghi %r15, 4128
     41 ; CHECK-NOFP: br %r14
     42 ;
     43 ; CHECK-FP-LABEL: f1:
     44 ; CHECK-FP: stmg %r11, %r15, 88(%r15)
     45 ; CHECK-FP: aghi %r15, -4128
     46 ; CHECK-FP: .cfi_def_cfa_offset 4288
     47 ; CHECK-FP: lgr %r11, %r15
     48 ; CHECK-FP: .cfi_def_cfa_register %r11
     49 ; CHECK-FP: stdy %f8, 4120(%r11)
     50 ; CHECK-FP: stdy %f9, 4112(%r11)
     51 ; CHECK-FP: stdy %f10, 4104(%r11)
     52 ; CHECK-FP: stdy %f11, 4096(%r11)
     53 ; CHECK-FP: std %f12, 4088(%r11)
     54 ; CHECK-FP: std %f13, 4080(%r11)
     55 ; CHECK-FP: std %f14, 4072(%r11)
     56 ; CHECK-FP: std %f15, 4064(%r11)
     57 ; ...main function body...
     58 ; CHECK-FP: ldy %f8, 4120(%r11)
     59 ; CHECK-FP: ldy %f9, 4112(%r11)
     60 ; CHECK-FP: ldy %f10, 4104(%r11)
     61 ; CHECK-FP: ldy %f11, 4096(%r11)
     62 ; CHECK-FP: ld %f12, 4088(%r11)
     63 ; CHECK-FP: ld %f13, 4080(%r11)
     64 ; CHECK-FP: ld %f14, 4072(%r11)
     65 ; CHECK-FP: ld %f15, 4064(%r11)
     66 ; CHECK-FP: lmg %r11, %r15, 4216(%r11)
     67 ; CHECK-FP: br %r14
     68   %y = alloca [486 x i64], align 8
     69   %elem = getelementptr inbounds [486 x i64], [486 x i64]* %y, i64 0, i64 0
     70   store volatile i64 %x, i64* %elem
     71   %l0 = load volatile double , double *%ptr
     72   %l1 = load volatile double , double *%ptr
     73   %l2 = load volatile double , double *%ptr
     74   %l3 = load volatile double , double *%ptr
     75   %l4 = load volatile double , double *%ptr
     76   %l5 = load volatile double , double *%ptr
     77   %l6 = load volatile double , double *%ptr
     78   %l7 = load volatile double , double *%ptr
     79   %l8 = load volatile double , double *%ptr
     80   %l9 = load volatile double , double *%ptr
     81   %l10 = load volatile double , double *%ptr
     82   %l11 = load volatile double , double *%ptr
     83   %l12 = load volatile double , double *%ptr
     84   %l13 = load volatile double , double *%ptr
     85   %l14 = load volatile double , double *%ptr
     86   %l15 = load volatile double , double *%ptr
     87   %add0 = fadd double %l0, %l0
     88   %add1 = fadd double %l1, %add0
     89   %add2 = fadd double %l2, %add1
     90   %add3 = fadd double %l3, %add2
     91   %add4 = fadd double %l4, %add3
     92   %add5 = fadd double %l5, %add4
     93   %add6 = fadd double %l6, %add5
     94   %add7 = fadd double %l7, %add6
     95   %add8 = fadd double %l8, %add7
     96   %add9 = fadd double %l9, %add8
     97   %add10 = fadd double %l10, %add9
     98   %add11 = fadd double %l11, %add10
     99   %add12 = fadd double %l12, %add11
    100   %add13 = fadd double %l13, %add12
    101   %add14 = fadd double %l14, %add13
    102   %add15 = fadd double %l15, %add14
    103   store volatile double %add0, double *%ptr
    104   store volatile double %add1, double *%ptr
    105   store volatile double %add2, double *%ptr
    106   store volatile double %add3, double *%ptr
    107   store volatile double %add4, double *%ptr
    108   store volatile double %add5, double *%ptr
    109   store volatile double %add6, double *%ptr
    110   store volatile double %add7, double *%ptr
    111   store volatile double %add8, double *%ptr
    112   store volatile double %add9, double *%ptr
    113   store volatile double %add10, double *%ptr
    114   store volatile double %add11, double *%ptr
    115   store volatile double %add12, double *%ptr
    116   store volatile double %add13, double *%ptr
    117   store volatile double %add14, double *%ptr
    118   store volatile double %add15, double *%ptr
    119   ret void
    120 }
    121 
    122 ; Test a frame size that requires some FPRs to be saved and loaded using
    123 ; an indexed STD and LD while others can use the 20-bit STDY and LDY.
    124 ; The index can be any call-clobbered GPR except %r0.
    125 ;
    126 ; Don't require the accesses to share the same LLILH; that would be a
    127 ; good optimisation but is really a different test.
    128 ;
    129 ; As above, get a frame of size 524320 by allocating
    130 ; (524320 - 176 - 8 * 8) / 8 = 65510 extra doublewords.
    131 define void @f2(double *%ptr, i64 %x) {
    132 ; CHECK-NOFP-LABEL: f2:
    133 ; CHECK-NOFP: agfi %r15, -524320
    134 ; CHECK-NOFP: .cfi_def_cfa_offset 524480
    135 ; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8
    136 ; CHECK-NOFP: std %f8, 24([[INDEX]],%r15)
    137 ; CHECK-NOFP: std %f9, 16({{%r[1-5]}},%r15)
    138 ; CHECK-NOFP: std %f10, 8({{%r[1-5]}},%r15)
    139 ; CHECK-NOFP: std %f11, 0({{%r[1-5]}},%r15)
    140 ; CHECK-NOFP: stdy %f12, 524280(%r15)
    141 ; CHECK-NOFP: stdy %f13, 524272(%r15)
    142 ; CHECK-NOFP: stdy %f14, 524264(%r15)
    143 ; CHECK-NOFP: stdy %f15, 524256(%r15)
    144 ; CHECK-NOFP: .cfi_offset %f8, -168
    145 ; CHECK-NOFP: .cfi_offset %f9, -176
    146 ; CHECK-NOFP: .cfi_offset %f10, -184
    147 ; CHECK-NOFP: .cfi_offset %f11, -192
    148 ; CHECK-NOFP: .cfi_offset %f12, -200
    149 ; CHECK-NOFP: .cfi_offset %f13, -208
    150 ; CHECK-NOFP: .cfi_offset %f14, -216
    151 ; CHECK-NOFP: .cfi_offset %f15, -224
    152 ; ...main function body...
    153 ; CHECK-NOFP: ld %f8, 24({{%r[1-5]}},%r15)
    154 ; CHECK-NOFP: ld %f9, 16({{%r[1-5]}},%r15)
    155 ; CHECK-NOFP: ld %f10, 8({{%r[1-5]}},%r15)
    156 ; CHECK-NOFP: ld %f11, 0({{%r[1-5]}},%r15)
    157 ; CHECK-NOFP: ldy %f12, 524280(%r15)
    158 ; CHECK-NOFP: ldy %f13, 524272(%r15)
    159 ; CHECK-NOFP: ldy %f14, 524264(%r15)
    160 ; CHECK-NOFP: ldy %f15, 524256(%r15)
    161 ; CHECK-NOFP: agfi %r15, 524320
    162 ; CHECK-NOFP: br %r14
    163 ;
    164 ; CHECK-FP-LABEL: f2:
    165 ; CHECK-FP: stmg %r11, %r15, 88(%r15)
    166 ; CHECK-FP: agfi %r15, -524320
    167 ; CHECK-FP: .cfi_def_cfa_offset 524480
    168 ; CHECK-FP: llilh [[INDEX:%r[1-5]]], 8
    169 ; CHECK-FP: std %f8, 24([[INDEX]],%r11)
    170 ; CHECK-FP: std %f9, 16({{%r[1-5]}},%r11)
    171 ; CHECK-FP: std %f10, 8({{%r[1-5]}},%r11)
    172 ; CHECK-FP: std %f11, 0({{%r[1-5]}},%r11)
    173 ; CHECK-FP: stdy %f12, 524280(%r11)
    174 ; CHECK-FP: stdy %f13, 524272(%r11)
    175 ; CHECK-FP: stdy %f14, 524264(%r11)
    176 ; CHECK-FP: stdy %f15, 524256(%r11)
    177 ; CHECK-FP: .cfi_offset %f8, -168
    178 ; CHECK-FP: .cfi_offset %f9, -176
    179 ; CHECK-FP: .cfi_offset %f10, -184
    180 ; CHECK-FP: .cfi_offset %f11, -192
    181 ; CHECK-FP: .cfi_offset %f12, -200
    182 ; CHECK-FP: .cfi_offset %f13, -208
    183 ; CHECK-FP: .cfi_offset %f14, -216
    184 ; CHECK-FP: .cfi_offset %f15, -224
    185 ; ...main function body...
    186 ; CHECK-FP: ld %f8, 24({{%r[1-5]}},%r11)
    187 ; CHECK-FP: ld %f9, 16({{%r[1-5]}},%r11)
    188 ; CHECK-FP: ld %f10, 8({{%r[1-5]}},%r11)
    189 ; CHECK-FP: ld %f11, 0({{%r[1-5]}},%r11)
    190 ; CHECK-FP: ldy %f12, 524280(%r11)
    191 ; CHECK-FP: ldy %f13, 524272(%r11)
    192 ; CHECK-FP: ldy %f14, 524264(%r11)
    193 ; CHECK-FP: ldy %f15, 524256(%r11)
    194 ; CHECK-FP: aghi %r11, 128
    195 ; CHECK-FP: lmg %r11, %r15, 524280(%r11)
    196 ; CHECK-FP: br %r14
    197   %y = alloca [65510 x i64], align 8
    198   %elem = getelementptr inbounds [65510 x i64], [65510 x i64]* %y, i64 0, i64 0
    199   store volatile i64 %x, i64* %elem
    200   %l0 = load volatile double , double *%ptr
    201   %l1 = load volatile double , double *%ptr
    202   %l2 = load volatile double , double *%ptr
    203   %l3 = load volatile double , double *%ptr
    204   %l4 = load volatile double , double *%ptr
    205   %l5 = load volatile double , double *%ptr
    206   %l6 = load volatile double , double *%ptr
    207   %l7 = load volatile double , double *%ptr
    208   %l8 = load volatile double , double *%ptr
    209   %l9 = load volatile double , double *%ptr
    210   %l10 = load volatile double , double *%ptr
    211   %l11 = load volatile double , double *%ptr
    212   %l12 = load volatile double , double *%ptr
    213   %l13 = load volatile double , double *%ptr
    214   %l14 = load volatile double , double *%ptr
    215   %l15 = load volatile double , double *%ptr
    216   %add0 = fadd double %l0, %l0
    217   %add1 = fadd double %l1, %add0
    218   %add2 = fadd double %l2, %add1
    219   %add3 = fadd double %l3, %add2
    220   %add4 = fadd double %l4, %add3
    221   %add5 = fadd double %l5, %add4
    222   %add6 = fadd double %l6, %add5
    223   %add7 = fadd double %l7, %add6
    224   %add8 = fadd double %l8, %add7
    225   %add9 = fadd double %l9, %add8
    226   %add10 = fadd double %l10, %add9
    227   %add11 = fadd double %l11, %add10
    228   %add12 = fadd double %l12, %add11
    229   %add13 = fadd double %l13, %add12
    230   %add14 = fadd double %l14, %add13
    231   %add15 = fadd double %l15, %add14
    232   store volatile double %add0, double *%ptr
    233   store volatile double %add1, double *%ptr
    234   store volatile double %add2, double *%ptr
    235   store volatile double %add3, double *%ptr
    236   store volatile double %add4, double *%ptr
    237   store volatile double %add5, double *%ptr
    238   store volatile double %add6, double *%ptr
    239   store volatile double %add7, double *%ptr
    240   store volatile double %add8, double *%ptr
    241   store volatile double %add9, double *%ptr
    242   store volatile double %add10, double *%ptr
    243   store volatile double %add11, double *%ptr
    244   store volatile double %add12, double *%ptr
    245   store volatile double %add13, double *%ptr
    246   store volatile double %add14, double *%ptr
    247   store volatile double %add15, double *%ptr
    248   ret void
    249 }
    250