1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package x86 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/x86" 11 ) 12 13 func defframe(ptxt *obj.Prog) { 14 var n *gc.Node 15 16 // fill in argument size, stack size 17 ptxt.To.Type = obj.TYPE_TEXTSIZE 18 19 ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr))) 20 frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg))) 21 ptxt.To.Offset = int64(frame) 22 23 // insert code to zero ambiguously live variables 24 // so that the garbage collector only sees initialized values 25 // when it looks for pointers. 26 p := ptxt 27 28 hi := int64(0) 29 lo := hi 30 ax := uint32(0) 31 for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next { 32 n = l.N 33 if !n.Name.Needzero { 34 continue 35 } 36 if n.Class != gc.PAUTO { 37 gc.Fatal("needzero class %d", n.Class) 38 } 39 if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 { 40 gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset)) 41 } 42 if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) { 43 // merge with range we already have 44 lo = n.Xoffset 45 46 continue 47 } 48 49 // zero old range 50 p = zerorange(p, int64(frame), lo, hi, &ax) 51 52 // set new range 53 hi = n.Xoffset + n.Type.Width 54 55 lo = n.Xoffset 56 } 57 58 // zero final range 59 zerorange(p, int64(frame), lo, hi, &ax) 60 } 61 62 func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog { 63 cnt := hi - lo 64 if cnt == 0 { 65 return p 66 } 67 if *ax == 0 { 68 p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 69 *ax = 1 70 } 71 72 if cnt <= int64(4*gc.Widthreg) { 73 for i := int64(0); i < cnt; i += int64(gc.Widthreg) { 74 p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i) 75 } 76 } else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) { 77 p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) 78 p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg))) 79 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 80 } else { 81 p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0) 82 p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) 83 p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 84 p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 85 } 86 87 return p 88 } 89 90 func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog { 91 q := gc.Ctxt.NewProg() 92 gc.Clearp(q) 93 q.As = int16(as) 94 q.Lineno = p.Lineno 95 q.From.Type = int16(ftype) 96 q.From.Reg = int16(freg) 97 q.From.Offset = foffset 98 q.To.Type = int16(ttype) 99 q.To.Reg = int16(treg) 100 q.To.Offset = toffset 101 q.Link = p.Link 102 p.Link = q 103 return q 104 } 105 106 func clearfat(nl *gc.Node) { 107 /* clear a fat object */ 108 if gc.Debug['g'] != 0 { 109 gc.Dump("\nclearfat", nl) 110 } 111 112 w := uint32(nl.Type.Width) 113 114 // Avoid taking the address for simple enough types. 115 if gc.Componentgen(nil, nl) { 116 return 117 } 118 119 c := w % 4 // bytes 120 q := w / 4 // quads 121 122 if q < 4 { 123 // Write sequence of MOV 0, off(base) instead of using STOSL. 124 // The hope is that although the code will be slightly longer, 125 // the MOVs will have no dependencies and pipeline better 126 // than the unrolled STOSL loop. 127 // NOTE: Must use agen, not igen, so that optimizer sees address 128 // being taken. We are not writing on field boundaries. 129 var n1 gc.Node 130 gc.Regalloc(&n1, gc.Types[gc.Tptr], nil) 131 132 gc.Agen(nl, &n1) 133 n1.Op = gc.OINDREG 134 var z gc.Node 135 gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) 136 for { 137 tmp14 := q 138 q-- 139 if tmp14 <= 0 { 140 break 141 } 142 n1.Type = z.Type 143 gins(x86.AMOVL, &z, &n1) 144 n1.Xoffset += 4 145 } 146 147 gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) 148 for { 149 tmp15 := c 150 c-- 151 if tmp15 <= 0 { 152 break 153 } 154 n1.Type = z.Type 155 gins(x86.AMOVB, &z, &n1) 156 n1.Xoffset++ 157 } 158 159 gc.Regfree(&n1) 160 return 161 } 162 163 var n1 gc.Node 164 gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI) 165 gc.Agen(nl, &n1) 166 gconreg(x86.AMOVL, 0, x86.REG_AX) 167 168 if q > 128 || (q >= 4 && gc.Nacl) { 169 gconreg(x86.AMOVL, int64(q), x86.REG_CX) 170 gins(x86.AREP, nil, nil) // repeat 171 gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ 172 } else if q >= 4 { 173 p := gins(obj.ADUFFZERO, nil, nil) 174 p.To.Type = obj.TYPE_ADDR 175 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 176 177 // 1 and 128 = magic constants: see ../../runtime/asm_386.s 178 p.To.Offset = 1 * (128 - int64(q)) 179 } else { 180 for q > 0 { 181 gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ 182 q-- 183 } 184 } 185 186 for c > 0 { 187 gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+ 188 c-- 189 } 190 } 191 192 var panicdiv *gc.Node 193 194 /* 195 * generate division. 196 * caller must set: 197 * ax = allocated AX register 198 * dx = allocated DX register 199 * generates one of: 200 * res = nl / nr 201 * res = nl % nr 202 * according to op. 203 */ 204 func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) { 205 // Have to be careful about handling 206 // most negative int divided by -1 correctly. 207 // The hardware will trap. 208 // Also the byte divide instruction needs AH, 209 // which we otherwise don't have to deal with. 210 // Easiest way to avoid for int8, int16: use int32. 211 // For int32 and int64, use explicit test. 212 // Could use int64 hw for int32. 213 t := nl.Type 214 215 t0 := t 216 check := 0 217 if gc.Issigned[t.Etype] { 218 check = 1 219 if gc.Isconst(nl, gc.CTINT) && nl.Int() != -1<<uint64(t.Width*8-1) { 220 check = 0 221 } else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 { 222 check = 0 223 } 224 } 225 226 if t.Width < 4 { 227 if gc.Issigned[t.Etype] { 228 t = gc.Types[gc.TINT32] 229 } else { 230 t = gc.Types[gc.TUINT32] 231 } 232 check = 0 233 } 234 235 var t1 gc.Node 236 gc.Tempname(&t1, t) 237 var t2 gc.Node 238 gc.Tempname(&t2, t) 239 if t0 != t { 240 var t3 gc.Node 241 gc.Tempname(&t3, t0) 242 var t4 gc.Node 243 gc.Tempname(&t4, t0) 244 gc.Cgen(nl, &t3) 245 gc.Cgen(nr, &t4) 246 247 // Convert. 248 gmove(&t3, &t1) 249 250 gmove(&t4, &t2) 251 } else { 252 gc.Cgen(nl, &t1) 253 gc.Cgen(nr, &t2) 254 } 255 256 var n1 gc.Node 257 if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) { 258 gc.Regalloc(&n1, t, res) 259 } else { 260 gc.Regalloc(&n1, t, nil) 261 } 262 gmove(&t2, &n1) 263 gmove(&t1, ax) 264 var p2 *obj.Prog 265 var n4 gc.Node 266 if gc.Nacl { 267 // Native Client does not relay the divide-by-zero trap 268 // to the executing program, so we must insert a check 269 // for ourselves. 270 gc.Nodconst(&n4, t, 0) 271 272 gins(optoas(gc.OCMP, t), &n1, &n4) 273 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 274 if panicdiv == nil { 275 panicdiv = gc.Sysfunc("panicdivide") 276 } 277 gc.Ginscall(panicdiv, -1) 278 gc.Patch(p1, gc.Pc) 279 } 280 281 if check != 0 { 282 gc.Nodconst(&n4, t, -1) 283 gins(optoas(gc.OCMP, t), &n1, &n4) 284 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 285 if op == gc.ODIV { 286 // a / (-1) is -a. 287 gins(optoas(gc.OMINUS, t), nil, ax) 288 289 gmove(ax, res) 290 } else { 291 // a % (-1) is 0. 292 gc.Nodconst(&n4, t, 0) 293 294 gmove(&n4, res) 295 } 296 297 p2 = gc.Gbranch(obj.AJMP, nil, 0) 298 gc.Patch(p1, gc.Pc) 299 } 300 301 if !gc.Issigned[t.Etype] { 302 var nz gc.Node 303 gc.Nodconst(&nz, t, 0) 304 gmove(&nz, dx) 305 } else { 306 gins(optoas(gc.OEXTEND, t), nil, nil) 307 } 308 gins(optoas(op, t), &n1, nil) 309 gc.Regfree(&n1) 310 311 if op == gc.ODIV { 312 gmove(ax, res) 313 } else { 314 gmove(dx, res) 315 } 316 if check != 0 { 317 gc.Patch(p2, gc.Pc) 318 } 319 } 320 321 func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) { 322 r := gc.GetReg(dr) 323 gc.Nodreg(x, gc.Types[gc.TINT32], dr) 324 325 // save current ax and dx if they are live 326 // and not the destination 327 *oldx = gc.Node{} 328 329 if r > 0 && !gc.Samereg(x, res) { 330 gc.Tempname(oldx, gc.Types[gc.TINT32]) 331 gmove(x, oldx) 332 } 333 334 gc.Regalloc(x, t, x) 335 } 336 337 func restx(x *gc.Node, oldx *gc.Node) { 338 gc.Regfree(x) 339 340 if oldx.Op != 0 { 341 x.Type = gc.Types[gc.TINT32] 342 gmove(oldx, x) 343 } 344 } 345 346 /* 347 * generate division according to op, one of: 348 * res = nl / nr 349 * res = nl % nr 350 */ 351 func cgen_div(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { 352 if gc.Is64(nl.Type) { 353 gc.Fatal("cgen_div %v", nl.Type) 354 } 355 356 var t *gc.Type 357 if gc.Issigned[nl.Type.Etype] { 358 t = gc.Types[gc.TINT32] 359 } else { 360 t = gc.Types[gc.TUINT32] 361 } 362 var ax gc.Node 363 var oldax gc.Node 364 savex(x86.REG_AX, &ax, &oldax, res, t) 365 var olddx gc.Node 366 var dx gc.Node 367 savex(x86.REG_DX, &dx, &olddx, res, t) 368 dodiv(op, nl, nr, res, &ax, &dx) 369 restx(&dx, &olddx) 370 restx(&ax, &oldax) 371 } 372 373 /* 374 * generate shift according to op, one of: 375 * res = nl << nr 376 * res = nl >> nr 377 */ 378 func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { 379 if nl.Type.Width > 4 { 380 gc.Fatal("cgen_shift %v", nl.Type) 381 } 382 383 w := int(nl.Type.Width * 8) 384 385 a := optoas(op, nl.Type) 386 387 if nr.Op == gc.OLITERAL { 388 var n2 gc.Node 389 gc.Tempname(&n2, nl.Type) 390 gc.Cgen(nl, &n2) 391 var n1 gc.Node 392 gc.Regalloc(&n1, nl.Type, res) 393 gmove(&n2, &n1) 394 sc := uint64(nr.Int()) 395 if sc >= uint64(nl.Type.Width*8) { 396 // large shift gets 2 shifts by width-1 397 gins(a, ncon(uint32(w)-1), &n1) 398 399 gins(a, ncon(uint32(w)-1), &n1) 400 } else { 401 gins(a, nr, &n1) 402 } 403 gmove(&n1, res) 404 gc.Regfree(&n1) 405 return 406 } 407 408 var oldcx gc.Node 409 var cx gc.Node 410 gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) 411 if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) { 412 gc.Tempname(&oldcx, gc.Types[gc.TUINT32]) 413 gmove(&cx, &oldcx) 414 } 415 416 var n1 gc.Node 417 var nt gc.Node 418 if nr.Type.Width > 4 { 419 gc.Tempname(&nt, nr.Type) 420 n1 = nt 421 } else { 422 gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) 423 gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX 424 } 425 426 var n2 gc.Node 427 if gc.Samereg(&cx, res) { 428 gc.Regalloc(&n2, nl.Type, nil) 429 } else { 430 gc.Regalloc(&n2, nl.Type, res) 431 } 432 if nl.Ullman >= nr.Ullman { 433 gc.Cgen(nl, &n2) 434 gc.Cgen(nr, &n1) 435 } else { 436 gc.Cgen(nr, &n1) 437 gc.Cgen(nl, &n2) 438 } 439 440 // test and fix up large shifts 441 if bounded { 442 if nr.Type.Width > 4 { 443 // delayed reg alloc 444 gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) 445 446 gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX 447 var lo gc.Node 448 var hi gc.Node 449 split64(&nt, &lo, &hi) 450 gmove(&lo, &n1) 451 splitclean() 452 } 453 } else { 454 var p1 *obj.Prog 455 if nr.Type.Width > 4 { 456 // delayed reg alloc 457 gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) 458 459 gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX 460 var lo gc.Node 461 var hi gc.Node 462 split64(&nt, &lo, &hi) 463 gmove(&lo, &n1) 464 gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0)) 465 p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1) 466 gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w))) 467 p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) 468 splitclean() 469 gc.Patch(p2, gc.Pc) 470 } else { 471 gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w))) 472 p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) 473 } 474 475 if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { 476 gins(a, ncon(uint32(w)-1), &n2) 477 } else { 478 gmove(ncon(0), &n2) 479 } 480 481 gc.Patch(p1, gc.Pc) 482 } 483 484 gins(a, &n1, &n2) 485 486 if oldcx.Op != 0 { 487 gmove(&oldcx, &cx) 488 } 489 490 gmove(&n2, res) 491 492 gc.Regfree(&n1) 493 gc.Regfree(&n2) 494 } 495 496 /* 497 * generate byte multiply: 498 * res = nl * nr 499 * there is no 2-operand byte multiply instruction so 500 * we do a full-width multiplication and truncate afterwards. 501 */ 502 func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool { 503 if optoas(op, nl.Type) != x86.AIMULB { 504 return false 505 } 506 507 // copy from byte to full registers 508 t := gc.Types[gc.TUINT32] 509 510 if gc.Issigned[nl.Type.Etype] { 511 t = gc.Types[gc.TINT32] 512 } 513 514 // largest ullman on left. 515 if nl.Ullman < nr.Ullman { 516 tmp := nl 517 nl = nr 518 nr = tmp 519 } 520 521 var nt gc.Node 522 gc.Tempname(&nt, nl.Type) 523 gc.Cgen(nl, &nt) 524 var n1 gc.Node 525 gc.Regalloc(&n1, t, res) 526 gc.Cgen(nr, &n1) 527 var n2 gc.Node 528 gc.Regalloc(&n2, t, nil) 529 gmove(&nt, &n2) 530 a := optoas(op, t) 531 gins(a, &n2, &n1) 532 gc.Regfree(&n2) 533 gmove(&n1, res) 534 gc.Regfree(&n1) 535 536 return true 537 } 538 539 /* 540 * generate high multiply: 541 * res = (nl*nr) >> width 542 */ 543 func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { 544 var n1 gc.Node 545 var n2 gc.Node 546 var ax gc.Node 547 var dx gc.Node 548 549 t := nl.Type 550 a := optoas(gc.OHMUL, t) 551 552 // gen nl in n1. 553 gc.Tempname(&n1, t) 554 555 gc.Cgen(nl, &n1) 556 557 // gen nr in n2. 558 gc.Regalloc(&n2, t, res) 559 560 gc.Cgen(nr, &n2) 561 562 // multiply. 563 gc.Nodreg(&ax, t, x86.REG_AX) 564 565 gmove(&n2, &ax) 566 gins(a, &n1, nil) 567 gc.Regfree(&n2) 568 569 if t.Width == 1 { 570 // byte multiply behaves differently. 571 gc.Nodreg(&ax, t, x86.REG_AH) 572 573 gc.Nodreg(&dx, t, x86.REG_DX) 574 gmove(&ax, &dx) 575 } 576 577 gc.Nodreg(&dx, t, x86.REG_DX) 578 gmove(&dx, res) 579 } 580 581 /* 582 * generate floating-point operation. 583 */ 584 func cgen_float(n *gc.Node, res *gc.Node) { 585 nl := n.Left 586 switch n.Op { 587 case gc.OEQ, 588 gc.ONE, 589 gc.OLT, 590 gc.OLE, 591 gc.OGE: 592 p1 := gc.Gbranch(obj.AJMP, nil, 0) 593 p2 := gc.Pc 594 gmove(gc.Nodbool(true), res) 595 p3 := gc.Gbranch(obj.AJMP, nil, 0) 596 gc.Patch(p1, gc.Pc) 597 gc.Bgen(n, true, 0, p2) 598 gmove(gc.Nodbool(false), res) 599 gc.Patch(p3, gc.Pc) 600 return 601 602 case gc.OPLUS: 603 gc.Cgen(nl, res) 604 return 605 606 case gc.OCONV: 607 if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) { 608 gc.Cgen(nl, res) 609 return 610 } 611 612 var n2 gc.Node 613 gc.Tempname(&n2, n.Type) 614 var n1 gc.Node 615 gc.Mgen(nl, &n1, res) 616 gmove(&n1, &n2) 617 gmove(&n2, res) 618 gc.Mfree(&n1) 619 return 620 } 621 622 if gc.Thearch.Use387 { 623 cgen_float387(n, res) 624 } else { 625 cgen_floatsse(n, res) 626 } 627 } 628 629 // floating-point. 387 (not SSE2) 630 func cgen_float387(n *gc.Node, res *gc.Node) { 631 var f0 gc.Node 632 var f1 gc.Node 633 634 nl := n.Left 635 nr := n.Right 636 gc.Nodreg(&f0, nl.Type, x86.REG_F0) 637 gc.Nodreg(&f1, n.Type, x86.REG_F0+1) 638 if nr != nil { 639 // binary 640 if nl.Ullman >= nr.Ullman { 641 gc.Cgen(nl, &f0) 642 if nr.Addable { 643 gins(foptoas(int(n.Op), n.Type, 0), nr, &f0) 644 } else { 645 gc.Cgen(nr, &f0) 646 gins(foptoas(int(n.Op), n.Type, Fpop), &f0, &f1) 647 } 648 } else { 649 gc.Cgen(nr, &f0) 650 if nl.Addable { 651 gins(foptoas(int(n.Op), n.Type, Frev), nl, &f0) 652 } else { 653 gc.Cgen(nl, &f0) 654 gins(foptoas(int(n.Op), n.Type, Frev|Fpop), &f0, &f1) 655 } 656 } 657 658 gmove(&f0, res) 659 return 660 } 661 662 // unary 663 gc.Cgen(nl, &f0) 664 665 if n.Op != gc.OCONV && n.Op != gc.OPLUS { 666 gins(foptoas(int(n.Op), n.Type, 0), nil, nil) 667 } 668 gmove(&f0, res) 669 return 670 } 671 672 func cgen_floatsse(n *gc.Node, res *gc.Node) { 673 var a int 674 675 nl := n.Left 676 nr := n.Right 677 switch n.Op { 678 default: 679 gc.Dump("cgen_floatsse", n) 680 gc.Fatal("cgen_floatsse %v", gc.Oconv(int(n.Op), 0)) 681 return 682 683 case gc.OMINUS, 684 gc.OCOM: 685 nr = gc.Nodintconst(-1) 686 gc.Convlit(&nr, n.Type) 687 a = foptoas(gc.OMUL, nl.Type, 0) 688 goto sbop 689 690 // symmetric binary 691 case gc.OADD, 692 gc.OMUL: 693 a = foptoas(int(n.Op), nl.Type, 0) 694 695 goto sbop 696 697 // asymmetric binary 698 case gc.OSUB, 699 gc.OMOD, 700 gc.ODIV: 701 a = foptoas(int(n.Op), nl.Type, 0) 702 703 goto abop 704 } 705 706 sbop: // symmetric binary 707 if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL { 708 r := nl 709 nl = nr 710 nr = r 711 } 712 713 abop: // asymmetric binary 714 if nl.Ullman >= nr.Ullman { 715 var nt gc.Node 716 gc.Tempname(&nt, nl.Type) 717 gc.Cgen(nl, &nt) 718 var n2 gc.Node 719 gc.Mgen(nr, &n2, nil) 720 var n1 gc.Node 721 gc.Regalloc(&n1, nl.Type, res) 722 gmove(&nt, &n1) 723 gins(a, &n2, &n1) 724 gmove(&n1, res) 725 gc.Regfree(&n1) 726 gc.Mfree(&n2) 727 } else { 728 var n2 gc.Node 729 gc.Regalloc(&n2, nr.Type, res) 730 gc.Cgen(nr, &n2) 731 var n1 gc.Node 732 gc.Regalloc(&n1, nl.Type, nil) 733 gc.Cgen(nl, &n1) 734 gins(a, &n2, &n1) 735 gc.Regfree(&n2) 736 gmove(&n1, res) 737 gc.Regfree(&n1) 738 } 739 740 return 741 } 742 743 func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) { 744 nl := n.Left 745 nr := n.Right 746 a := int(n.Op) 747 if !wantTrue { 748 // brcom is not valid on floats when NaN is involved. 749 p1 := gc.Gbranch(obj.AJMP, nil, 0) 750 p2 := gc.Gbranch(obj.AJMP, nil, 0) 751 gc.Patch(p1, gc.Pc) 752 753 // No need to avoid re-genning ninit. 754 bgen_float(n, true, -likely, p2) 755 756 gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to) 757 gc.Patch(p2, gc.Pc) 758 return 759 } 760 761 if gc.Thearch.Use387 { 762 a = gc.Brrev(a) // because the args are stacked 763 if a == gc.OGE || a == gc.OGT { 764 // only < and <= work right with NaN; reverse if needed 765 nl, nr = nr, nl 766 a = gc.Brrev(a) 767 } 768 769 var ax, n2, tmp gc.Node 770 gc.Nodreg(&tmp, nr.Type, x86.REG_F0) 771 gc.Nodreg(&n2, nr.Type, x86.REG_F0+1) 772 gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) 773 if gc.Simsimtype(nr.Type) == gc.TFLOAT64 { 774 if nl.Ullman > nr.Ullman { 775 gc.Cgen(nl, &tmp) 776 gc.Cgen(nr, &tmp) 777 gins(x86.AFXCHD, &tmp, &n2) 778 } else { 779 gc.Cgen(nr, &tmp) 780 gc.Cgen(nl, &tmp) 781 } 782 783 gins(x86.AFUCOMIP, &tmp, &n2) 784 gins(x86.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF 785 } else { 786 // TODO(rsc): The moves back and forth to memory 787 // here are for truncating the value to 32 bits. 788 // This handles 32-bit comparison but presumably 789 // all the other ops have the same problem. 790 // We need to figure out what the right general 791 // solution is, besides telling people to use float64. 792 var t1 gc.Node 793 gc.Tempname(&t1, gc.Types[gc.TFLOAT32]) 794 795 var t2 gc.Node 796 gc.Tempname(&t2, gc.Types[gc.TFLOAT32]) 797 gc.Cgen(nr, &t1) 798 gc.Cgen(nl, &t2) 799 gmove(&t2, &tmp) 800 gins(x86.AFCOMFP, &t1, &tmp) 801 gins(x86.AFSTSW, nil, &ax) 802 gins(x86.ASAHF, nil, nil) 803 } 804 } else { 805 // Not 387 806 if !nl.Addable { 807 nl = gc.CgenTemp(nl) 808 } 809 if !nr.Addable { 810 nr = gc.CgenTemp(nr) 811 } 812 813 var n2 gc.Node 814 gc.Regalloc(&n2, nr.Type, nil) 815 gmove(nr, &n2) 816 nr = &n2 817 818 if nl.Op != gc.OREGISTER { 819 var n3 gc.Node 820 gc.Regalloc(&n3, nl.Type, nil) 821 gmove(nl, &n3) 822 nl = &n3 823 } 824 825 if a == gc.OGE || a == gc.OGT { 826 // only < and <= work right with NaN; reverse if needed 827 nl, nr = nr, nl 828 a = gc.Brrev(a) 829 } 830 831 gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr) 832 if nl.Op == gc.OREGISTER { 833 gc.Regfree(nl) 834 } 835 gc.Regfree(nr) 836 } 837 838 switch a { 839 case gc.OEQ: 840 // neither NE nor P 841 p1 := gc.Gbranch(x86.AJNE, nil, -likely) 842 p2 := gc.Gbranch(x86.AJPS, nil, -likely) 843 gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to) 844 gc.Patch(p1, gc.Pc) 845 gc.Patch(p2, gc.Pc) 846 case gc.ONE: 847 // either NE or P 848 gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to) 849 gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to) 850 default: 851 gc.Patch(gc.Gbranch(optoas(a, nr.Type), nil, likely), to) 852 } 853 } 854 855 // Called after regopt and peep have run. 856 // Expand CHECKNIL pseudo-op into actual nil pointer check. 857 func expandchecks(firstp *obj.Prog) { 858 var p1 *obj.Prog 859 var p2 *obj.Prog 860 861 for p := firstp; p != nil; p = p.Link { 862 if p.As != obj.ACHECKNIL { 863 continue 864 } 865 if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers 866 gc.Warnl(int(p.Lineno), "generated nil check") 867 } 868 869 // check is 870 // CMP arg, $0 871 // JNE 2(PC) (likely) 872 // MOV AX, 0 873 p1 = gc.Ctxt.NewProg() 874 875 p2 = gc.Ctxt.NewProg() 876 gc.Clearp(p1) 877 gc.Clearp(p2) 878 p1.Link = p2 879 p2.Link = p.Link 880 p.Link = p1 881 p1.Lineno = p.Lineno 882 p2.Lineno = p.Lineno 883 p1.Pc = 9999 884 p2.Pc = 9999 885 p.As = x86.ACMPL 886 p.To.Type = obj.TYPE_CONST 887 p.To.Offset = 0 888 p1.As = x86.AJNE 889 p1.From.Type = obj.TYPE_CONST 890 p1.From.Offset = 1 // likely 891 p1.To.Type = obj.TYPE_BRANCH 892 p1.To.Val = p2.Link 893 894 // crash by write to memory address 0. 895 // if possible, since we know arg is 0, use 0(arg), 896 // which will be shorter to encode than plain 0. 897 p2.As = x86.AMOVL 898 899 p2.From.Type = obj.TYPE_REG 900 p2.From.Reg = x86.REG_AX 901 if regtyp(&p.From) { 902 p2.To.Type = obj.TYPE_MEM 903 p2.To.Reg = p.From.Reg 904 } else { 905 p2.To.Type = obj.TYPE_MEM 906 } 907 p2.To.Offset = 0 908 } 909 } 910 911 // addr += index*width if possible. 912 func addindex(index *gc.Node, width int64, addr *gc.Node) bool { 913 switch width { 914 case 1, 2, 4, 8: 915 p1 := gins(x86.ALEAL, index, addr) 916 p1.From.Type = obj.TYPE_MEM 917 p1.From.Scale = int16(width) 918 p1.From.Index = p1.From.Reg 919 p1.From.Reg = p1.To.Reg 920 return true 921 } 922 return false 923 } 924 925 // res = runtime.getg() 926 func getg(res *gc.Node) { 927 var n1 gc.Node 928 gc.Regalloc(&n1, res.Type, res) 929 mov := optoas(gc.OAS, gc.Types[gc.Tptr]) 930 p := gins(mov, nil, &n1) 931 p.From.Type = obj.TYPE_REG 932 p.From.Reg = x86.REG_TLS 933 p = gins(mov, nil, &n1) 934 p.From = p.To 935 p.From.Type = obj.TYPE_MEM 936 p.From.Index = x86.REG_TLS 937 p.From.Scale = 1 938 gmove(&n1, res) 939 gc.Regfree(&n1) 940 } 941