1 ; Test the saving and restoring of FPRs in large frames. 2 ; 3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s 4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s 5 6 ; Test a frame size that requires some FPRs to be saved and loaded using 7 ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. 8 ; The frame is big enough to require two emergency spill slots at 160(%r15), 9 ; as well as the 8 FPR save slots. Get a frame of size 4128 by allocating 10 ; (4128 - 176 - 8 * 8) / 8 = 486 extra doublewords. 11 define void @f1(double *%ptr, i64 %x) { 12 ; CHECK-NOFP-LABEL: f1: 13 ; CHECK-NOFP: aghi %r15, -4128 14 ; CHECK-NOFP: .cfi_def_cfa_offset 4288 15 ; CHECK-NOFP: stdy %f8, 4120(%r15) 16 ; CHECK-NOFP: stdy %f9, 4112(%r15) 17 ; CHECK-NOFP: stdy %f10, 4104(%r15) 18 ; CHECK-NOFP: stdy %f11, 4096(%r15) 19 ; CHECK-NOFP: std %f12, 4088(%r15) 20 ; CHECK-NOFP: std %f13, 4080(%r15) 21 ; CHECK-NOFP: std %f14, 4072(%r15) 22 ; CHECK-NOFP: std %f15, 4064(%r15) 23 ; CHECK-NOFP: .cfi_offset %f8, -168 24 ; CHECK-NOFP: .cfi_offset %f9, -176 25 ; CHECK-NOFP: .cfi_offset %f10, -184 26 ; CHECK-NOFP: .cfi_offset %f11, -192 27 ; CHECK-NOFP: .cfi_offset %f12, -200 28 ; CHECK-NOFP: .cfi_offset %f13, -208 29 ; CHECK-NOFP: .cfi_offset %f14, -216 30 ; CHECK-NOFP: .cfi_offset %f15, -224 31 ; ...main function body... 32 ; CHECK-NOFP: ldy %f8, 4120(%r15) 33 ; CHECK-NOFP: ldy %f9, 4112(%r15) 34 ; CHECK-NOFP: ldy %f10, 4104(%r15) 35 ; CHECK-NOFP: ldy %f11, 4096(%r15) 36 ; CHECK-NOFP: ld %f12, 4088(%r15) 37 ; CHECK-NOFP: ld %f13, 4080(%r15) 38 ; CHECK-NOFP: ld %f14, 4072(%r15) 39 ; CHECK-NOFP: ld %f15, 4064(%r15) 40 ; CHECK-NOFP: aghi %r15, 4128 41 ; CHECK-NOFP: br %r14 42 ; 43 ; CHECK-FP-LABEL: f1: 44 ; CHECK-FP: stmg %r11, %r15, 88(%r15) 45 ; CHECK-FP: aghi %r15, -4128 46 ; CHECK-FP: .cfi_def_cfa_offset 4288 47 ; CHECK-FP: lgr %r11, %r15 48 ; CHECK-FP: .cfi_def_cfa_register %r11 49 ; CHECK-FP: stdy %f8, 4120(%r11) 50 ; CHECK-FP: stdy %f9, 4112(%r11) 51 ; CHECK-FP: stdy %f10, 4104(%r11) 52 ; CHECK-FP: stdy %f11, 4096(%r11) 53 ; CHECK-FP: std %f12, 4088(%r11) 54 ; CHECK-FP: std %f13, 4080(%r11) 55 ; CHECK-FP: std %f14, 4072(%r11) 56 ; CHECK-FP: std %f15, 4064(%r11) 57 ; ...main function body... 58 ; CHECK-FP: ldy %f8, 4120(%r11) 59 ; CHECK-FP: ldy %f9, 4112(%r11) 60 ; CHECK-FP: ldy %f10, 4104(%r11) 61 ; CHECK-FP: ldy %f11, 4096(%r11) 62 ; CHECK-FP: ld %f12, 4088(%r11) 63 ; CHECK-FP: ld %f13, 4080(%r11) 64 ; CHECK-FP: ld %f14, 4072(%r11) 65 ; CHECK-FP: ld %f15, 4064(%r11) 66 ; CHECK-FP: lmg %r11, %r15, 4216(%r11) 67 ; CHECK-FP: br %r14 68 %y = alloca [486 x i64], align 8 69 %elem = getelementptr inbounds [486 x i64], [486 x i64]* %y, i64 0, i64 0 70 store volatile i64 %x, i64* %elem 71 %l0 = load volatile double , double *%ptr 72 %l1 = load volatile double , double *%ptr 73 %l2 = load volatile double , double *%ptr 74 %l3 = load volatile double , double *%ptr 75 %l4 = load volatile double , double *%ptr 76 %l5 = load volatile double , double *%ptr 77 %l6 = load volatile double , double *%ptr 78 %l7 = load volatile double , double *%ptr 79 %l8 = load volatile double , double *%ptr 80 %l9 = load volatile double , double *%ptr 81 %l10 = load volatile double , double *%ptr 82 %l11 = load volatile double , double *%ptr 83 %l12 = load volatile double , double *%ptr 84 %l13 = load volatile double , double *%ptr 85 %l14 = load volatile double , double *%ptr 86 %l15 = load volatile double , double *%ptr 87 %add0 = fadd double %l0, %l0 88 %add1 = fadd double %l1, %add0 89 %add2 = fadd double %l2, %add1 90 %add3 = fadd double %l3, %add2 91 %add4 = fadd double %l4, %add3 92 %add5 = fadd double %l5, %add4 93 %add6 = fadd double %l6, %add5 94 %add7 = fadd double %l7, %add6 95 %add8 = fadd double %l8, %add7 96 %add9 = fadd double %l9, %add8 97 %add10 = fadd double %l10, %add9 98 %add11 = fadd double %l11, %add10 99 %add12 = fadd double %l12, %add11 100 %add13 = fadd double %l13, %add12 101 %add14 = fadd double %l14, %add13 102 %add15 = fadd double %l15, %add14 103 store volatile double %add0, double *%ptr 104 store volatile double %add1, double *%ptr 105 store volatile double %add2, double *%ptr 106 store volatile double %add3, double *%ptr 107 store volatile double %add4, double *%ptr 108 store volatile double %add5, double *%ptr 109 store volatile double %add6, double *%ptr 110 store volatile double %add7, double *%ptr 111 store volatile double %add8, double *%ptr 112 store volatile double %add9, double *%ptr 113 store volatile double %add10, double *%ptr 114 store volatile double %add11, double *%ptr 115 store volatile double %add12, double *%ptr 116 store volatile double %add13, double *%ptr 117 store volatile double %add14, double *%ptr 118 store volatile double %add15, double *%ptr 119 ret void 120 } 121 122 ; Test a frame size that requires some FPRs to be saved and loaded using 123 ; an indexed STD and LD while others can use the 20-bit STDY and LDY. 124 ; The index can be any call-clobbered GPR except %r0. 125 ; 126 ; Don't require the accesses to share the same LLILH; that would be a 127 ; good optimisation but is really a different test. 128 ; 129 ; As above, get a frame of size 524320 by allocating 130 ; (524320 - 176 - 8 * 8) / 8 = 65510 extra doublewords. 131 define void @f2(double *%ptr, i64 %x) { 132 ; CHECK-NOFP-LABEL: f2: 133 ; CHECK-NOFP: agfi %r15, -524320 134 ; CHECK-NOFP: .cfi_def_cfa_offset 524480 135 ; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8 136 ; CHECK-NOFP: std %f8, 24([[INDEX]],%r15) 137 ; CHECK-NOFP: std %f9, 16({{%r[1-5]}},%r15) 138 ; CHECK-NOFP: std %f10, 8({{%r[1-5]}},%r15) 139 ; CHECK-NOFP: std %f11, 0({{%r[1-5]}},%r15) 140 ; CHECK-NOFP: stdy %f12, 524280(%r15) 141 ; CHECK-NOFP: stdy %f13, 524272(%r15) 142 ; CHECK-NOFP: stdy %f14, 524264(%r15) 143 ; CHECK-NOFP: stdy %f15, 524256(%r15) 144 ; CHECK-NOFP: .cfi_offset %f8, -168 145 ; CHECK-NOFP: .cfi_offset %f9, -176 146 ; CHECK-NOFP: .cfi_offset %f10, -184 147 ; CHECK-NOFP: .cfi_offset %f11, -192 148 ; CHECK-NOFP: .cfi_offset %f12, -200 149 ; CHECK-NOFP: .cfi_offset %f13, -208 150 ; CHECK-NOFP: .cfi_offset %f14, -216 151 ; CHECK-NOFP: .cfi_offset %f15, -224 152 ; ...main function body... 153 ; CHECK-NOFP: ld %f8, 24({{%r[1-5]}},%r15) 154 ; CHECK-NOFP: ld %f9, 16({{%r[1-5]}},%r15) 155 ; CHECK-NOFP: ld %f10, 8({{%r[1-5]}},%r15) 156 ; CHECK-NOFP: ld %f11, 0({{%r[1-5]}},%r15) 157 ; CHECK-NOFP: ldy %f12, 524280(%r15) 158 ; CHECK-NOFP: ldy %f13, 524272(%r15) 159 ; CHECK-NOFP: ldy %f14, 524264(%r15) 160 ; CHECK-NOFP: ldy %f15, 524256(%r15) 161 ; CHECK-NOFP: agfi %r15, 524320 162 ; CHECK-NOFP: br %r14 163 ; 164 ; CHECK-FP-LABEL: f2: 165 ; CHECK-FP: stmg %r11, %r15, 88(%r15) 166 ; CHECK-FP: agfi %r15, -524320 167 ; CHECK-FP: .cfi_def_cfa_offset 524480 168 ; CHECK-FP: llilh [[INDEX:%r[1-5]]], 8 169 ; CHECK-FP: std %f8, 24([[INDEX]],%r11) 170 ; CHECK-FP: std %f9, 16({{%r[1-5]}},%r11) 171 ; CHECK-FP: std %f10, 8({{%r[1-5]}},%r11) 172 ; CHECK-FP: std %f11, 0({{%r[1-5]}},%r11) 173 ; CHECK-FP: stdy %f12, 524280(%r11) 174 ; CHECK-FP: stdy %f13, 524272(%r11) 175 ; CHECK-FP: stdy %f14, 524264(%r11) 176 ; CHECK-FP: stdy %f15, 524256(%r11) 177 ; CHECK-FP: .cfi_offset %f8, -168 178 ; CHECK-FP: .cfi_offset %f9, -176 179 ; CHECK-FP: .cfi_offset %f10, -184 180 ; CHECK-FP: .cfi_offset %f11, -192 181 ; CHECK-FP: .cfi_offset %f12, -200 182 ; CHECK-FP: .cfi_offset %f13, -208 183 ; CHECK-FP: .cfi_offset %f14, -216 184 ; CHECK-FP: .cfi_offset %f15, -224 185 ; ...main function body... 186 ; CHECK-FP: ld %f8, 24({{%r[1-5]}},%r11) 187 ; CHECK-FP: ld %f9, 16({{%r[1-5]}},%r11) 188 ; CHECK-FP: ld %f10, 8({{%r[1-5]}},%r11) 189 ; CHECK-FP: ld %f11, 0({{%r[1-5]}},%r11) 190 ; CHECK-FP: ldy %f12, 524280(%r11) 191 ; CHECK-FP: ldy %f13, 524272(%r11) 192 ; CHECK-FP: ldy %f14, 524264(%r11) 193 ; CHECK-FP: ldy %f15, 524256(%r11) 194 ; CHECK-FP: aghi %r11, 128 195 ; CHECK-FP: lmg %r11, %r15, 524280(%r11) 196 ; CHECK-FP: br %r14 197 %y = alloca [65510 x i64], align 8 198 %elem = getelementptr inbounds [65510 x i64], [65510 x i64]* %y, i64 0, i64 0 199 store volatile i64 %x, i64* %elem 200 %l0 = load volatile double , double *%ptr 201 %l1 = load volatile double , double *%ptr 202 %l2 = load volatile double , double *%ptr 203 %l3 = load volatile double , double *%ptr 204 %l4 = load volatile double , double *%ptr 205 %l5 = load volatile double , double *%ptr 206 %l6 = load volatile double , double *%ptr 207 %l7 = load volatile double , double *%ptr 208 %l8 = load volatile double , double *%ptr 209 %l9 = load volatile double , double *%ptr 210 %l10 = load volatile double , double *%ptr 211 %l11 = load volatile double , double *%ptr 212 %l12 = load volatile double , double *%ptr 213 %l13 = load volatile double , double *%ptr 214 %l14 = load volatile double , double *%ptr 215 %l15 = load volatile double , double *%ptr 216 %add0 = fadd double %l0, %l0 217 %add1 = fadd double %l1, %add0 218 %add2 = fadd double %l2, %add1 219 %add3 = fadd double %l3, %add2 220 %add4 = fadd double %l4, %add3 221 %add5 = fadd double %l5, %add4 222 %add6 = fadd double %l6, %add5 223 %add7 = fadd double %l7, %add6 224 %add8 = fadd double %l8, %add7 225 %add9 = fadd double %l9, %add8 226 %add10 = fadd double %l10, %add9 227 %add11 = fadd double %l11, %add10 228 %add12 = fadd double %l12, %add11 229 %add13 = fadd double %l13, %add12 230 %add14 = fadd double %l14, %add13 231 %add15 = fadd double %l15, %add14 232 store volatile double %add0, double *%ptr 233 store volatile double %add1, double *%ptr 234 store volatile double %add2, double *%ptr 235 store volatile double %add3, double *%ptr 236 store volatile double %add4, double *%ptr 237 store volatile double %add5, double *%ptr 238 store volatile double %add6, double *%ptr 239 store volatile double %add7, double *%ptr 240 store volatile double %add8, double *%ptr 241 store volatile double %add9, double *%ptr 242 store volatile double %add10, double *%ptr 243 store volatile double %add11, double *%ptr 244 store volatile double %add12, double *%ptr 245 store volatile double %add13, double *%ptr 246 store volatile double %add14, double *%ptr 247 store volatile double %add15, double *%ptr 248 ret void 249 } 250