1 // Inferno utils/6l/pass.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/pass.c 3 // 4 // Copyright 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright 1995-1997 C H Forsyth (forsyth (a] terzarima.net) 6 // Portions Copyright 1997-1999 Vita Nuova Limited 7 // Portions Copyright 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright 2004,2006 Bruce Ellis 9 // Portions Copyright 2005-2007 C H Forsyth (forsyth (a] terzarima.net) 10 // Revisions Copyright 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/internal/obj" 35 "cmd/internal/objabi" 36 "cmd/internal/sys" 37 "math" 38 "strings" 39 ) 40 41 func CanUse1InsnTLS(ctxt *obj.Link) bool { 42 if isAndroid { 43 // For android, we use a disgusting hack that assumes 44 // the thread-local storage slot for g is allocated 45 // using pthread_key_create with a fixed offset 46 // (see src/runtime/cgo/gcc_android_amd64.c). 47 // This makes access to the TLS storage (for g) doable 48 // with 1 instruction. 49 return true 50 } 51 52 if ctxt.Arch.Family == sys.I386 { 53 switch ctxt.Headtype { 54 case objabi.Hlinux, 55 objabi.Hnacl, 56 objabi.Hplan9, 57 objabi.Hwindows: 58 return false 59 } 60 61 return true 62 } 63 64 switch ctxt.Headtype { 65 case objabi.Hplan9, objabi.Hwindows: 66 return false 67 case objabi.Hlinux: 68 return !ctxt.Flag_shared 69 } 70 71 return true 72 } 73 74 func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 75 // Thread-local storage references use the TLS pseudo-register. 76 // As a register, TLS refers to the thread-local storage base, and it 77 // can only be loaded into another register: 78 // 79 // MOVQ TLS, AX 80 // 81 // An offset from the thread-local storage base is written off(reg)(TLS*1). 82 // Semantically it is off(reg), but the (TLS*1) annotation marks this as 83 // indexing from the loaded TLS base. This emits a relocation so that 84 // if the linker needs to adjust the offset, it can. For example: 85 // 86 // MOVQ TLS, AX 87 // MOVQ 0(AX)(TLS*1), CX // load g into CX 88 // 89 // On systems that support direct access to the TLS memory, this 90 // pair of instructions can be reduced to a direct TLS memory reference: 91 // 92 // MOVQ 0(TLS), CX // load g into CX 93 // 94 // The 2-instruction and 1-instruction forms correspond to the two code 95 // sequences for loading a TLS variable in the local exec model given in "ELF 96 // Handling For Thread-Local Storage". 97 // 98 // We apply this rewrite on systems that support the 1-instruction form. 99 // The decision is made using only the operating system and the -shared flag, 100 // not the link mode. If some link modes on a particular operating system 101 // require the 2-instruction form, then all builds for that operating system 102 // will use the 2-instruction form, so that the link mode decision can be 103 // delayed to link time. 104 // 105 // In this way, all supported systems use identical instructions to 106 // access TLS, and they are rewritten appropriately first here in 107 // liblink and then finally using relocations in the linker. 108 // 109 // When -shared is passed, we leave the code in the 2-instruction form but 110 // assemble (and relocate) them in different ways to generate the initial 111 // exec code sequence. It's a bit of a fluke that this is possible without 112 // rewriting the instructions more comprehensively, and it only does because 113 // we only support a single TLS variable (g). 114 115 if CanUse1InsnTLS(ctxt) { 116 // Reduce 2-instruction sequence to 1-instruction sequence. 117 // Sequences like 118 // MOVQ TLS, BX 119 // ... off(BX)(TLS*1) ... 120 // become 121 // NOP 122 // ... off(TLS) ... 123 // 124 // TODO(rsc): Remove the Hsolaris special case. It exists only to 125 // guarantee we are producing byte-identical binaries as before this code. 126 // But it should be unnecessary. 127 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris { 128 obj.Nopout(p) 129 } 130 if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { 131 p.From.Reg = REG_TLS 132 p.From.Scale = 0 133 p.From.Index = REG_NONE 134 } 135 136 if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 137 p.To.Reg = REG_TLS 138 p.To.Scale = 0 139 p.To.Index = REG_NONE 140 } 141 } else { 142 // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it 143 // as the 2-instruction sequence if necessary. 144 // MOVQ 0(TLS), BX 145 // becomes 146 // MOVQ TLS, BX 147 // MOVQ 0(BX)(TLS*1), BX 148 if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { 149 q := obj.Appendp(p, newprog) 150 q.As = p.As 151 q.From = p.From 152 q.From.Type = obj.TYPE_MEM 153 q.From.Reg = p.To.Reg 154 q.From.Index = REG_TLS 155 q.From.Scale = 2 // TODO: use 1 156 q.To = p.To 157 p.From.Type = obj.TYPE_REG 158 p.From.Reg = REG_TLS 159 p.From.Index = REG_NONE 160 p.From.Offset = 0 161 } 162 } 163 164 // TODO: Remove. 165 if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 { 166 if p.From.Scale == 1 && p.From.Index == REG_TLS { 167 p.From.Scale = 2 168 } 169 if p.To.Scale == 1 && p.To.Index == REG_TLS { 170 p.To.Scale = 2 171 } 172 } 173 174 // Rewrite 0 to $0 in 3rd argument to CMPPS etc. 175 // That's what the tables expect. 176 switch p.As { 177 case ACMPPD, ACMPPS, ACMPSD, ACMPSS: 178 if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { 179 p.To.Type = obj.TYPE_CONST 180 } 181 } 182 183 // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. 184 switch p.As { 185 case obj.ACALL, obj.AJMP, obj.ARET: 186 if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { 187 p.To.Type = obj.TYPE_BRANCH 188 } 189 } 190 191 // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. 192 if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { 193 switch p.As { 194 case AMOVL: 195 p.As = ALEAL 196 p.From.Type = obj.TYPE_MEM 197 case AMOVQ: 198 p.As = ALEAQ 199 p.From.Type = obj.TYPE_MEM 200 } 201 } 202 203 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 204 if p.GetFrom3() != nil { 205 nacladdr(ctxt, p, p.GetFrom3()) 206 } 207 nacladdr(ctxt, p, &p.From) 208 nacladdr(ctxt, p, &p.To) 209 } 210 211 // Rewrite float constants to values stored in memory. 212 switch p.As { 213 // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx 214 case AMOVSS: 215 if p.From.Type == obj.TYPE_FCONST { 216 // f == 0 can't be used here due to -0, so use Float64bits 217 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 218 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 219 p.As = AXORPS 220 p.From = p.To 221 break 222 } 223 } 224 } 225 fallthrough 226 227 case AFMOVF, 228 AFADDF, 229 AFSUBF, 230 AFSUBRF, 231 AFMULF, 232 AFDIVF, 233 AFDIVRF, 234 AFCOMF, 235 AFCOMFP, 236 AADDSS, 237 ASUBSS, 238 AMULSS, 239 ADIVSS, 240 ACOMISS, 241 AUCOMISS: 242 if p.From.Type == obj.TYPE_FCONST { 243 f32 := float32(p.From.Val.(float64)) 244 p.From.Type = obj.TYPE_MEM 245 p.From.Name = obj.NAME_EXTERN 246 p.From.Sym = ctxt.Float32Sym(f32) 247 p.From.Offset = 0 248 } 249 250 case AMOVSD: 251 // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx 252 if p.From.Type == obj.TYPE_FCONST { 253 // f == 0 can't be used here due to -0, so use Float64bits 254 if f := p.From.Val.(float64); math.Float64bits(f) == 0 { 255 if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { 256 p.As = AXORPS 257 p.From = p.To 258 break 259 } 260 } 261 } 262 fallthrough 263 264 case AFMOVD, 265 AFADDD, 266 AFSUBD, 267 AFSUBRD, 268 AFMULD, 269 AFDIVD, 270 AFDIVRD, 271 AFCOMD, 272 AFCOMDP, 273 AADDSD, 274 ASUBSD, 275 AMULSD, 276 ADIVSD, 277 ACOMISD, 278 AUCOMISD: 279 if p.From.Type == obj.TYPE_FCONST { 280 f64 := p.From.Val.(float64) 281 p.From.Type = obj.TYPE_MEM 282 p.From.Name = obj.NAME_EXTERN 283 p.From.Sym = ctxt.Float64Sym(f64) 284 p.From.Offset = 0 285 } 286 } 287 288 if ctxt.Flag_dynlink { 289 rewriteToUseGot(ctxt, p, newprog) 290 } 291 292 if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 { 293 rewriteToPcrel(ctxt, p, newprog) 294 } 295 } 296 297 // Rewrite p, if necessary, to access global data via the global offset table. 298 func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 299 var lea, mov obj.As 300 var reg int16 301 if ctxt.Arch.Family == sys.AMD64 { 302 lea = ALEAQ 303 mov = AMOVQ 304 reg = REG_R15 305 } else { 306 lea = ALEAL 307 mov = AMOVL 308 reg = REG_CX 309 if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 310 // Special case: clobber the destination register with 311 // the PC so we don't have to clobber CX. 312 // The SSA backend depends on CX not being clobbered across LEAL. 313 // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared). 314 reg = p.To.Reg 315 } 316 } 317 318 if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { 319 // ADUFFxxx $offset 320 // becomes 321 // $MOV runtime.duffxxx@GOT, $reg 322 // $LEA $offset($reg), $reg 323 // CALL $reg 324 // (we use LEAx rather than ADDx because ADDx clobbers 325 // flags and duffzero on 386 does not otherwise do so). 326 var sym *obj.LSym 327 if p.As == obj.ADUFFZERO { 328 sym = ctxt.Lookup("runtime.duffzero") 329 } else { 330 sym = ctxt.Lookup("runtime.duffcopy") 331 } 332 offset := p.To.Offset 333 p.As = mov 334 p.From.Type = obj.TYPE_MEM 335 p.From.Name = obj.NAME_GOTREF 336 p.From.Sym = sym 337 p.To.Type = obj.TYPE_REG 338 p.To.Reg = reg 339 p.To.Offset = 0 340 p.To.Sym = nil 341 p1 := obj.Appendp(p, newprog) 342 p1.As = lea 343 p1.From.Type = obj.TYPE_MEM 344 p1.From.Offset = offset 345 p1.From.Reg = reg 346 p1.To.Type = obj.TYPE_REG 347 p1.To.Reg = reg 348 p2 := obj.Appendp(p1, newprog) 349 p2.As = obj.ACALL 350 p2.To.Type = obj.TYPE_REG 351 p2.To.Reg = reg 352 } 353 354 // We only care about global data: NAME_EXTERN means a global 355 // symbol in the Go sense, and p.Sym.Local is true for a few 356 // internally defined symbols. 357 if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 358 // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below 359 p.As = mov 360 p.From.Type = obj.TYPE_ADDR 361 } 362 if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 363 // $MOV $sym, Rx becomes $MOV sym@GOT, Rx 364 // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx 365 // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX 366 cmplxdest := false 367 pAs := p.As 368 var dest obj.Addr 369 if p.To.Type != obj.TYPE_REG || pAs != mov { 370 if ctxt.Arch.Family == sys.AMD64 { 371 ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) 372 } 373 cmplxdest = true 374 dest = p.To 375 p.As = mov 376 p.To.Type = obj.TYPE_REG 377 p.To.Reg = reg 378 p.To.Sym = nil 379 p.To.Name = obj.NAME_NONE 380 } 381 p.From.Type = obj.TYPE_MEM 382 p.From.Name = obj.NAME_GOTREF 383 q := p 384 if p.From.Offset != 0 { 385 q = obj.Appendp(p, newprog) 386 q.As = lea 387 q.From.Type = obj.TYPE_MEM 388 q.From.Reg = p.To.Reg 389 q.From.Offset = p.From.Offset 390 q.To = p.To 391 p.From.Offset = 0 392 } 393 if cmplxdest { 394 q = obj.Appendp(q, newprog) 395 q.As = pAs 396 q.To = dest 397 q.From.Type = obj.TYPE_REG 398 q.From.Reg = reg 399 } 400 } 401 if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { 402 ctxt.Diag("don't know how to handle %v with -dynlink", p) 403 } 404 var source *obj.Addr 405 // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry 406 // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) 407 // An addition may be inserted between the two MOVs if there is an offset. 408 if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { 409 if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 410 ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) 411 } 412 source = &p.From 413 } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { 414 source = &p.To 415 } else { 416 return 417 } 418 if p.As == obj.ACALL { 419 // When dynlinking on 386, almost any call might end up being a call 420 // to a PLT, so make sure the GOT pointer is loaded into BX. 421 // RegTo2 is set on the replacement call insn to stop it being 422 // processed when it is in turn passed to progedit. 423 if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { 424 return 425 } 426 p1 := obj.Appendp(p, newprog) 427 p2 := obj.Appendp(p1, newprog) 428 429 p1.As = ALEAL 430 p1.From.Type = obj.TYPE_MEM 431 p1.From.Name = obj.NAME_STATIC 432 p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_") 433 p1.To.Type = obj.TYPE_REG 434 p1.To.Reg = REG_BX 435 436 p2.As = p.As 437 p2.Scond = p.Scond 438 p2.From = p.From 439 if p.RestArgs != nil { 440 p2.RestArgs = append(p2.RestArgs, p.RestArgs...) 441 } 442 p2.Reg = p.Reg 443 p2.To = p.To 444 // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr 445 // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 446 // itself gets passed to progedit. 447 p2.To.Type = obj.TYPE_MEM 448 p2.RegTo2 = 1 449 450 obj.Nopout(p) 451 return 452 453 } 454 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { 455 return 456 } 457 if source.Type != obj.TYPE_MEM { 458 ctxt.Diag("don't know how to handle %v with -dynlink", p) 459 } 460 p1 := obj.Appendp(p, newprog) 461 p2 := obj.Appendp(p1, newprog) 462 463 p1.As = mov 464 p1.From.Type = obj.TYPE_MEM 465 p1.From.Sym = source.Sym 466 p1.From.Name = obj.NAME_GOTREF 467 p1.To.Type = obj.TYPE_REG 468 p1.To.Reg = reg 469 470 p2.As = p.As 471 p2.From = p.From 472 p2.To = p.To 473 if p.From.Name == obj.NAME_EXTERN { 474 p2.From.Reg = reg 475 p2.From.Name = obj.NAME_NONE 476 p2.From.Sym = nil 477 } else if p.To.Name == obj.NAME_EXTERN { 478 p2.To.Reg = reg 479 p2.To.Name = obj.NAME_NONE 480 p2.To.Sym = nil 481 } else { 482 return 483 } 484 obj.Nopout(p) 485 } 486 487 func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { 488 // RegTo2 is set on the instructions we insert here so they don't get 489 // processed twice. 490 if p.RegTo2 != 0 { 491 return 492 } 493 if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { 494 return 495 } 496 // Any Prog (aside from the above special cases) with an Addr with Name == 497 // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX 498 // inserted before it. 499 isName := func(a *obj.Addr) bool { 500 if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { 501 return false 502 } 503 if a.Sym.Type == objabi.STLSBSS { 504 return false 505 } 506 return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF 507 } 508 509 if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { 510 // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting 511 // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" 512 // respectively. 513 if p.To.Type != obj.TYPE_REG { 514 q := obj.Appendp(p, newprog) 515 q.As = p.As 516 q.From.Type = obj.TYPE_REG 517 q.From.Reg = REG_CX 518 q.To = p.To 519 p.As = AMOVL 520 p.To.Type = obj.TYPE_REG 521 p.To.Reg = REG_CX 522 p.To.Sym = nil 523 p.To.Name = obj.NAME_NONE 524 } 525 } 526 527 if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) { 528 return 529 } 530 var dst int16 = REG_CX 531 if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { 532 dst = p.To.Reg 533 // Why? See the comment near the top of rewriteToUseGot above. 534 // AMOVLs might be introduced by the GOT rewrites. 535 } 536 q := obj.Appendp(p, newprog) 537 q.RegTo2 = 1 538 r := obj.Appendp(q, newprog) 539 r.RegTo2 = 1 540 q.As = obj.ACALL 541 thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))) 542 q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) }) 543 q.To.Type = obj.TYPE_MEM 544 q.To.Name = obj.NAME_EXTERN 545 r.As = p.As 546 r.Scond = p.Scond 547 r.From = p.From 548 r.RestArgs = p.RestArgs 549 r.Reg = p.Reg 550 r.To = p.To 551 if isName(&p.From) { 552 r.From.Reg = dst 553 } 554 if isName(&p.To) { 555 r.To.Reg = dst 556 } 557 if p.GetFrom3() != nil && isName(p.GetFrom3()) { 558 r.GetFrom3().Reg = dst 559 } 560 obj.Nopout(p) 561 } 562 563 func nacladdr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 564 if p.As == ALEAL || p.As == ALEAQ { 565 return 566 } 567 568 if a.Reg == REG_BP { 569 ctxt.Diag("invalid address: %v", p) 570 return 571 } 572 573 if a.Reg == REG_TLS { 574 a.Reg = REG_BP 575 } 576 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 577 switch a.Reg { 578 // all ok 579 case REG_BP, REG_SP, REG_R15: 580 break 581 582 default: 583 if a.Index != REG_NONE { 584 ctxt.Diag("invalid address %v", p) 585 } 586 a.Index = a.Reg 587 if a.Index != REG_NONE { 588 a.Scale = 1 589 } 590 a.Reg = REG_R15 591 } 592 } 593 } 594 595 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { 596 if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { 597 return 598 } 599 600 p := cursym.Func.Text 601 autoffset := int32(p.To.Offset) 602 if autoffset < 0 { 603 autoffset = 0 604 } 605 606 hasCall := false 607 for q := p; q != nil; q = q.Link { 608 if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO { 609 hasCall = true 610 break 611 } 612 } 613 614 var bpsize int 615 if ctxt.Arch.Family == sys.AMD64 && ctxt.Framepointer_enabled && 616 !p.From.Sym.NoFrame() && // (1) below 617 !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below 618 !(autoffset == 0 && !hasCall) { // (3) below 619 // Make room to save a base pointer. 620 // There are 2 cases we must avoid: 621 // 1) If noframe is set (which we do for functions which tail call). 622 // 2) Scary runtime internals which would be all messed up by frame pointers. 623 // We detect these using a heuristic: frameless nosplit functions. 624 // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic. 625 // For performance, we also want to avoid: 626 // 3) Frameless leaf functions 627 bpsize = ctxt.Arch.PtrSize 628 autoffset += int32(bpsize) 629 p.To.Offset += int64(bpsize) 630 } else { 631 bpsize = 0 632 } 633 634 textarg := int64(p.To.Val.(int32)) 635 cursym.Func.Args = int32(textarg) 636 cursym.Func.Locals = int32(p.To.Offset) 637 638 // TODO(rsc): Remove. 639 if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { 640 cursym.Func.Locals = 0 641 } 642 643 // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. 644 if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() { 645 leaf := true 646 LeafSearch: 647 for q := p; q != nil; q = q.Link { 648 switch q.As { 649 case obj.ACALL: 650 // Treat common runtime calls that take no arguments 651 // the same as duffcopy and duffzero. 652 if !isZeroArgRuntimeCall(q.To.Sym) { 653 leaf = false 654 break LeafSearch 655 } 656 fallthrough 657 case obj.ADUFFCOPY, obj.ADUFFZERO: 658 if autoffset >= objabi.StackSmall-8 { 659 leaf = false 660 break LeafSearch 661 } 662 } 663 } 664 665 if leaf { 666 p.From.Sym.Set(obj.AttrNoSplit, true) 667 } 668 } 669 670 if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() { 671 p = obj.Appendp(p, newprog) 672 p = load_g_cx(ctxt, p, newprog) // load g into CX 673 } 674 675 if !cursym.Func.Text.From.Sym.NoSplit() { 676 p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check 677 } 678 679 if autoffset != 0 { 680 if autoffset%int32(ctxt.Arch.RegSize) != 0 { 681 ctxt.Diag("unaligned stack size %d", autoffset) 682 } 683 p = obj.Appendp(p, newprog) 684 p.As = AADJSP 685 p.From.Type = obj.TYPE_CONST 686 p.From.Offset = int64(autoffset) 687 p.Spadj = autoffset 688 } 689 690 deltasp := autoffset 691 692 if bpsize > 0 { 693 // Save caller's BP 694 p = obj.Appendp(p, newprog) 695 696 p.As = AMOVQ 697 p.From.Type = obj.TYPE_REG 698 p.From.Reg = REG_BP 699 p.To.Type = obj.TYPE_MEM 700 p.To.Reg = REG_SP 701 p.To.Scale = 1 702 p.To.Offset = int64(autoffset) - int64(bpsize) 703 704 // Move current frame to BP 705 p = obj.Appendp(p, newprog) 706 707 p.As = ALEAQ 708 p.From.Type = obj.TYPE_MEM 709 p.From.Reg = REG_SP 710 p.From.Scale = 1 711 p.From.Offset = int64(autoffset) - int64(bpsize) 712 p.To.Type = obj.TYPE_REG 713 p.To.Reg = REG_BP 714 } 715 716 if cursym.Func.Text.From.Sym.Wrapper() { 717 // if g._panic != nil && g._panic.argp == FP { 718 // g._panic.argp = bottom-of-frame 719 // } 720 // 721 // MOVQ g_panic(CX), BX 722 // TESTQ BX, BX 723 // JNE checkargp 724 // end: 725 // NOP 726 // ... rest of function ... 727 // checkargp: 728 // LEAQ (autoffset+8)(SP), DI 729 // CMPQ panic_argp(BX), DI 730 // JNE end 731 // MOVQ SP, panic_argp(BX) 732 // JMP end 733 // 734 // The NOP is needed to give the jumps somewhere to land. 735 // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. 736 // 737 // The layout is chosen to help static branch prediction: 738 // Both conditional jumps are unlikely, so they are arranged to be forward jumps. 739 740 // MOVQ g_panic(CX), BX 741 p = obj.Appendp(p, newprog) 742 p.As = AMOVQ 743 p.From.Type = obj.TYPE_MEM 744 p.From.Reg = REG_CX 745 p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic 746 p.To.Type = obj.TYPE_REG 747 p.To.Reg = REG_BX 748 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 749 p.As = AMOVL 750 p.From.Type = obj.TYPE_MEM 751 p.From.Reg = REG_R15 752 p.From.Scale = 1 753 p.From.Index = REG_CX 754 } 755 if ctxt.Arch.Family == sys.I386 { 756 p.As = AMOVL 757 } 758 759 // TESTQ BX, BX 760 p = obj.Appendp(p, newprog) 761 p.As = ATESTQ 762 p.From.Type = obj.TYPE_REG 763 p.From.Reg = REG_BX 764 p.To.Type = obj.TYPE_REG 765 p.To.Reg = REG_BX 766 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 767 p.As = ATESTL 768 } 769 770 // JNE checkargp (checkargp to be resolved later) 771 jne := obj.Appendp(p, newprog) 772 jne.As = AJNE 773 jne.To.Type = obj.TYPE_BRANCH 774 775 // end: 776 // NOP 777 end := obj.Appendp(jne, newprog) 778 end.As = obj.ANOP 779 780 // Fast forward to end of function. 781 var last *obj.Prog 782 for last = end; last.Link != nil; last = last.Link { 783 } 784 785 // LEAQ (autoffset+8)(SP), DI 786 p = obj.Appendp(last, newprog) 787 p.As = ALEAQ 788 p.From.Type = obj.TYPE_MEM 789 p.From.Reg = REG_SP 790 p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) 791 p.To.Type = obj.TYPE_REG 792 p.To.Reg = REG_DI 793 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 794 p.As = ALEAL 795 } 796 797 // Set jne branch target. 798 jne.Pcond = p 799 800 // CMPQ panic_argp(BX), DI 801 p = obj.Appendp(p, newprog) 802 p.As = ACMPQ 803 p.From.Type = obj.TYPE_MEM 804 p.From.Reg = REG_BX 805 p.From.Offset = 0 // Panic.argp 806 p.To.Type = obj.TYPE_REG 807 p.To.Reg = REG_DI 808 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 809 p.As = ACMPL 810 p.From.Type = obj.TYPE_MEM 811 p.From.Reg = REG_R15 812 p.From.Scale = 1 813 p.From.Index = REG_BX 814 } 815 if ctxt.Arch.Family == sys.I386 { 816 p.As = ACMPL 817 } 818 819 // JNE end 820 p = obj.Appendp(p, newprog) 821 p.As = AJNE 822 p.To.Type = obj.TYPE_BRANCH 823 p.Pcond = end 824 825 // MOVQ SP, panic_argp(BX) 826 p = obj.Appendp(p, newprog) 827 p.As = AMOVQ 828 p.From.Type = obj.TYPE_REG 829 p.From.Reg = REG_SP 830 p.To.Type = obj.TYPE_MEM 831 p.To.Reg = REG_BX 832 p.To.Offset = 0 // Panic.argp 833 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 834 p.As = AMOVL 835 p.To.Type = obj.TYPE_MEM 836 p.To.Reg = REG_R15 837 p.To.Scale = 1 838 p.To.Index = REG_BX 839 } 840 if ctxt.Arch.Family == sys.I386 { 841 p.As = AMOVL 842 } 843 844 // JMP end 845 p = obj.Appendp(p, newprog) 846 p.As = obj.AJMP 847 p.To.Type = obj.TYPE_BRANCH 848 p.Pcond = end 849 850 // Reset p for following code. 851 p = end 852 } 853 854 for ; p != nil; p = p.Link { 855 pcsize := ctxt.Arch.RegSize 856 switch p.From.Name { 857 case obj.NAME_AUTO: 858 p.From.Offset += int64(deltasp) - int64(bpsize) 859 case obj.NAME_PARAM: 860 p.From.Offset += int64(deltasp) + int64(pcsize) 861 } 862 if p.GetFrom3() != nil { 863 switch p.GetFrom3().Name { 864 case obj.NAME_AUTO: 865 p.GetFrom3().Offset += int64(deltasp) - int64(bpsize) 866 case obj.NAME_PARAM: 867 p.GetFrom3().Offset += int64(deltasp) + int64(pcsize) 868 } 869 } 870 switch p.To.Name { 871 case obj.NAME_AUTO: 872 p.To.Offset += int64(deltasp) - int64(bpsize) 873 case obj.NAME_PARAM: 874 p.To.Offset += int64(deltasp) + int64(pcsize) 875 } 876 877 switch p.As { 878 default: 879 continue 880 881 case APUSHL, APUSHFL: 882 deltasp += 4 883 p.Spadj = 4 884 continue 885 886 case APUSHQ, APUSHFQ: 887 deltasp += 8 888 p.Spadj = 8 889 continue 890 891 case APUSHW, APUSHFW: 892 deltasp += 2 893 p.Spadj = 2 894 continue 895 896 case APOPL, APOPFL: 897 deltasp -= 4 898 p.Spadj = -4 899 continue 900 901 case APOPQ, APOPFQ: 902 deltasp -= 8 903 p.Spadj = -8 904 continue 905 906 case APOPW, APOPFW: 907 deltasp -= 2 908 p.Spadj = -2 909 continue 910 911 case obj.ARET: 912 // do nothing 913 } 914 915 if autoffset != deltasp { 916 ctxt.Diag("unbalanced PUSH/POP") 917 } 918 919 if autoffset != 0 { 920 if bpsize > 0 { 921 // Restore caller's BP 922 p.As = AMOVQ 923 924 p.From.Type = obj.TYPE_MEM 925 p.From.Reg = REG_SP 926 p.From.Scale = 1 927 p.From.Offset = int64(autoffset) - int64(bpsize) 928 p.To.Type = obj.TYPE_REG 929 p.To.Reg = REG_BP 930 p = obj.Appendp(p, newprog) 931 } 932 933 p.As = AADJSP 934 p.From.Type = obj.TYPE_CONST 935 p.From.Offset = int64(-autoffset) 936 p.Spadj = -autoffset 937 p = obj.Appendp(p, newprog) 938 p.As = obj.ARET 939 940 // If there are instructions following 941 // this ARET, they come from a branch 942 // with the same stackframe, so undo 943 // the cleanup. 944 p.Spadj = +autoffset 945 } 946 947 if p.To.Sym != nil { // retjmp 948 p.As = obj.AJMP 949 } 950 } 951 } 952 953 func isZeroArgRuntimeCall(s *obj.LSym) bool { 954 if s == nil { 955 return false 956 } 957 switch s.Name { 958 case "runtime.panicindex", "runtime.panicslice", "runtime.panicdivide", "runtime.panicwrap": 959 return true 960 } 961 return false 962 } 963 964 func indir_cx(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { 965 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 966 a.Type = obj.TYPE_MEM 967 a.Reg = REG_R15 968 a.Index = REG_CX 969 a.Scale = 1 970 return 971 } 972 973 a.Type = obj.TYPE_MEM 974 a.Reg = REG_CX 975 } 976 977 // Append code to p to load g into cx. 978 // Overwrites p with the first instruction (no first appendp). 979 // Overwriting p is unusual but it lets use this in both the 980 // prologue (caller must call appendp first) and in the epilogue. 981 // Returns last new instruction. 982 func load_g_cx(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) *obj.Prog { 983 p.As = AMOVQ 984 if ctxt.Arch.PtrSize == 4 { 985 p.As = AMOVL 986 } 987 p.From.Type = obj.TYPE_MEM 988 p.From.Reg = REG_TLS 989 p.From.Offset = 0 990 p.To.Type = obj.TYPE_REG 991 p.To.Reg = REG_CX 992 993 next := p.Link 994 progedit(ctxt, p, newprog) 995 for p.Link != next { 996 p = p.Link 997 } 998 999 if p.From.Index == REG_TLS { 1000 p.From.Scale = 2 1001 } 1002 1003 return p 1004 } 1005 1006 // Append code to p to check for stack split. 1007 // Appends to (does not overwrite) p. 1008 // Assumes g is in CX. 1009 // Returns last new instruction. 1010 func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog { 1011 cmp := ACMPQ 1012 lea := ALEAQ 1013 mov := AMOVQ 1014 sub := ASUBQ 1015 1016 if ctxt.Headtype == objabi.Hnacl || ctxt.Arch.Family == sys.I386 { 1017 cmp = ACMPL 1018 lea = ALEAL 1019 mov = AMOVL 1020 sub = ASUBL 1021 } 1022 1023 var q1 *obj.Prog 1024 if framesize <= objabi.StackSmall { 1025 // small stack: SP <= stackguard 1026 // CMPQ SP, stackguard 1027 p = obj.Appendp(p, newprog) 1028 1029 p.As = cmp 1030 p.From.Type = obj.TYPE_REG 1031 p.From.Reg = REG_SP 1032 indir_cx(ctxt, p, &p.To) 1033 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1034 if cursym.CFunc() { 1035 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1036 } 1037 } else if framesize <= objabi.StackBig { 1038 // large stack: SP-framesize <= stackguard-StackSmall 1039 // LEAQ -xxx(SP), AX 1040 // CMPQ AX, stackguard 1041 p = obj.Appendp(p, newprog) 1042 1043 p.As = lea 1044 p.From.Type = obj.TYPE_MEM 1045 p.From.Reg = REG_SP 1046 p.From.Offset = -(int64(framesize) - objabi.StackSmall) 1047 p.To.Type = obj.TYPE_REG 1048 p.To.Reg = REG_AX 1049 1050 p = obj.Appendp(p, newprog) 1051 p.As = cmp 1052 p.From.Type = obj.TYPE_REG 1053 p.From.Reg = REG_AX 1054 indir_cx(ctxt, p, &p.To) 1055 p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1056 if cursym.CFunc() { 1057 p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1058 } 1059 } else { 1060 // Such a large stack we need to protect against wraparound. 1061 // If SP is close to zero: 1062 // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) 1063 // The +StackGuard on both sides is required to keep the left side positive: 1064 // SP is allowed to be slightly below stackguard. See stack.h. 1065 // 1066 // Preemption sets stackguard to StackPreempt, a very large value. 1067 // That breaks the math above, so we have to check for that explicitly. 1068 // MOVQ stackguard, CX 1069 // CMPQ CX, $StackPreempt 1070 // JEQ label-of-call-to-morestack 1071 // LEAQ StackGuard(SP), AX 1072 // SUBQ CX, AX 1073 // CMPQ AX, $(framesize+(StackGuard-StackSmall)) 1074 1075 p = obj.Appendp(p, newprog) 1076 1077 p.As = mov 1078 indir_cx(ctxt, p, &p.From) 1079 p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 1080 if cursym.CFunc() { 1081 p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 1082 } 1083 p.To.Type = obj.TYPE_REG 1084 p.To.Reg = REG_SI 1085 1086 p = obj.Appendp(p, newprog) 1087 p.As = cmp 1088 p.From.Type = obj.TYPE_REG 1089 p.From.Reg = REG_SI 1090 p.To.Type = obj.TYPE_CONST 1091 p.To.Offset = objabi.StackPreempt 1092 if ctxt.Arch.Family == sys.I386 { 1093 p.To.Offset = int64(uint32(objabi.StackPreempt & (1<<32 - 1))) 1094 } 1095 1096 p = obj.Appendp(p, newprog) 1097 p.As = AJEQ 1098 p.To.Type = obj.TYPE_BRANCH 1099 q1 = p 1100 1101 p = obj.Appendp(p, newprog) 1102 p.As = lea 1103 p.From.Type = obj.TYPE_MEM 1104 p.From.Reg = REG_SP 1105 p.From.Offset = objabi.StackGuard 1106 p.To.Type = obj.TYPE_REG 1107 p.To.Reg = REG_AX 1108 1109 p = obj.Appendp(p, newprog) 1110 p.As = sub 1111 p.From.Type = obj.TYPE_REG 1112 p.From.Reg = REG_SI 1113 p.To.Type = obj.TYPE_REG 1114 p.To.Reg = REG_AX 1115 1116 p = obj.Appendp(p, newprog) 1117 p.As = cmp 1118 p.From.Type = obj.TYPE_REG 1119 p.From.Reg = REG_AX 1120 p.To.Type = obj.TYPE_CONST 1121 p.To.Offset = int64(framesize) + (objabi.StackGuard - objabi.StackSmall) 1122 } 1123 1124 // common 1125 jls := obj.Appendp(p, newprog) 1126 jls.As = AJLS 1127 jls.To.Type = obj.TYPE_BRANCH 1128 1129 var last *obj.Prog 1130 for last = cursym.Func.Text; last.Link != nil; last = last.Link { 1131 } 1132 1133 // Now we are at the end of the function, but logically 1134 // we are still in function prologue. We need to fix the 1135 // SP data and PCDATA. 1136 spfix := obj.Appendp(last, newprog) 1137 spfix.As = obj.ANOP 1138 spfix.Spadj = -framesize 1139 1140 pcdata := obj.Appendp(spfix, newprog) 1141 pcdata.Pos = cursym.Func.Text.Pos 1142 pcdata.As = obj.APCDATA 1143 pcdata.From.Type = obj.TYPE_CONST 1144 pcdata.From.Offset = objabi.PCDATA_StackMapIndex 1145 pcdata.To.Type = obj.TYPE_CONST 1146 pcdata.To.Offset = -1 // pcdata starts at -1 at function entry 1147 1148 call := obj.Appendp(pcdata, newprog) 1149 call.Pos = cursym.Func.Text.Pos 1150 call.As = obj.ACALL 1151 call.To.Type = obj.TYPE_BRANCH 1152 call.To.Name = obj.NAME_EXTERN 1153 morestack := "runtime.morestack" 1154 switch { 1155 case cursym.CFunc(): 1156 morestack = "runtime.morestackc" 1157 case !cursym.Func.Text.From.Sym.NeedCtxt(): 1158 morestack = "runtime.morestack_noctxt" 1159 } 1160 call.To.Sym = ctxt.Lookup(morestack) 1161 // When compiling 386 code for dynamic linking, the call needs to be adjusted 1162 // to follow PIC rules. This in turn can insert more instructions, so we need 1163 // to keep track of the start of the call (where the jump will be to) and the 1164 // end (which following instructions are appended to). 1165 callend := call 1166 progedit(ctxt, callend, newprog) 1167 for ; callend.Link != nil; callend = callend.Link { 1168 progedit(ctxt, callend.Link, newprog) 1169 } 1170 1171 jmp := obj.Appendp(callend, newprog) 1172 jmp.As = obj.AJMP 1173 jmp.To.Type = obj.TYPE_BRANCH 1174 jmp.Pcond = cursym.Func.Text.Link 1175 jmp.Spadj = +framesize 1176 1177 jls.Pcond = call 1178 if q1 != nil { 1179 q1.Pcond = call 1180 } 1181 1182 return jls 1183 } 1184 1185 var unaryDst = map[obj.As]bool{ 1186 ABSWAPL: true, 1187 ABSWAPQ: true, 1188 ACLFLUSH: true, 1189 ACMPXCHG8B: true, 1190 ADECB: true, 1191 ADECL: true, 1192 ADECQ: true, 1193 ADECW: true, 1194 AINCB: true, 1195 AINCL: true, 1196 AINCQ: true, 1197 AINCW: true, 1198 ANEGB: true, 1199 ANEGL: true, 1200 ANEGQ: true, 1201 ANEGW: true, 1202 ANOTB: true, 1203 ANOTL: true, 1204 ANOTQ: true, 1205 ANOTW: true, 1206 APOPL: true, 1207 APOPQ: true, 1208 APOPW: true, 1209 ASETCC: true, 1210 ASETCS: true, 1211 ASETEQ: true, 1212 ASETGE: true, 1213 ASETGT: true, 1214 ASETHI: true, 1215 ASETLE: true, 1216 ASETLS: true, 1217 ASETLT: true, 1218 ASETMI: true, 1219 ASETNE: true, 1220 ASETOC: true, 1221 ASETOS: true, 1222 ASETPC: true, 1223 ASETPL: true, 1224 ASETPS: true, 1225 AFFREE: true, 1226 AFLDENV: true, 1227 AFSAVE: true, 1228 AFSTCW: true, 1229 AFSTENV: true, 1230 AFSTSW: true, 1231 AFXSAVE: true, 1232 AFXSAVE64: true, 1233 ASTMXCSR: true, 1234 } 1235 1236 var Linkamd64 = obj.LinkArch{ 1237 Arch: sys.ArchAMD64, 1238 Init: instinit, 1239 Preprocess: preprocess, 1240 Assemble: span6, 1241 Progedit: progedit, 1242 UnaryDst: unaryDst, 1243 DWARFRegisters: AMD64DWARFRegisters, 1244 } 1245 1246 var Linkamd64p32 = obj.LinkArch{ 1247 Arch: sys.ArchAMD64P32, 1248 Init: instinit, 1249 Preprocess: preprocess, 1250 Assemble: span6, 1251 Progedit: progedit, 1252 UnaryDst: unaryDst, 1253 DWARFRegisters: AMD64DWARFRegisters, 1254 } 1255 1256 var Link386 = obj.LinkArch{ 1257 Arch: sys.Arch386, 1258 Init: instinit, 1259 Preprocess: preprocess, 1260 Assemble: span6, 1261 Progedit: progedit, 1262 UnaryDst: unaryDst, 1263 DWARFRegisters: X86DWARFRegisters, 1264 } 1265