Home | History | Annotate | Download | only in x86
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package x86
      6 
      7 import (
      8 	"cmd/compile/internal/gc"
      9 	"cmd/internal/obj"
     10 	"cmd/internal/obj/x86"
     11 )
     12 
     13 func defframe(ptxt *obj.Prog) {
     14 	var n *gc.Node
     15 
     16 	// fill in argument size, stack size
     17 	ptxt.To.Type = obj.TYPE_TEXTSIZE
     18 
     19 	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
     20 	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
     21 	ptxt.To.Offset = int64(frame)
     22 
     23 	// insert code to zero ambiguously live variables
     24 	// so that the garbage collector only sees initialized values
     25 	// when it looks for pointers.
     26 	p := ptxt
     27 
     28 	hi := int64(0)
     29 	lo := hi
     30 	ax := uint32(0)
     31 	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
     32 		n = l.N
     33 		if !n.Name.Needzero {
     34 			continue
     35 		}
     36 		if n.Class != gc.PAUTO {
     37 			gc.Fatal("needzero class %d", n.Class)
     38 		}
     39 		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
     40 			gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
     41 		}
     42 		if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) {
     43 			// merge with range we already have
     44 			lo = n.Xoffset
     45 
     46 			continue
     47 		}
     48 
     49 		// zero old range
     50 		p = zerorange(p, int64(frame), lo, hi, &ax)
     51 
     52 		// set new range
     53 		hi = n.Xoffset + n.Type.Width
     54 
     55 		lo = n.Xoffset
     56 	}
     57 
     58 	// zero final range
     59 	zerorange(p, int64(frame), lo, hi, &ax)
     60 }
     61 
     62 func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
     63 	cnt := hi - lo
     64 	if cnt == 0 {
     65 		return p
     66 	}
     67 	if *ax == 0 {
     68 		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
     69 		*ax = 1
     70 	}
     71 
     72 	if cnt <= int64(4*gc.Widthreg) {
     73 		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
     74 			p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
     75 		}
     76 	} else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) {
     77 		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
     78 		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg)))
     79 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
     80 	} else {
     81 		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
     82 		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
     83 		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
     84 		p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
     85 	}
     86 
     87 	return p
     88 }
     89 
     90 func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
     91 	q := gc.Ctxt.NewProg()
     92 	gc.Clearp(q)
     93 	q.As = int16(as)
     94 	q.Lineno = p.Lineno
     95 	q.From.Type = int16(ftype)
     96 	q.From.Reg = int16(freg)
     97 	q.From.Offset = foffset
     98 	q.To.Type = int16(ttype)
     99 	q.To.Reg = int16(treg)
    100 	q.To.Offset = toffset
    101 	q.Link = p.Link
    102 	p.Link = q
    103 	return q
    104 }
    105 
    106 func clearfat(nl *gc.Node) {
    107 	/* clear a fat object */
    108 	if gc.Debug['g'] != 0 {
    109 		gc.Dump("\nclearfat", nl)
    110 	}
    111 
    112 	w := uint32(nl.Type.Width)
    113 
    114 	// Avoid taking the address for simple enough types.
    115 	if gc.Componentgen(nil, nl) {
    116 		return
    117 	}
    118 
    119 	c := w % 4 // bytes
    120 	q := w / 4 // quads
    121 
    122 	if q < 4 {
    123 		// Write sequence of MOV 0, off(base) instead of using STOSL.
    124 		// The hope is that although the code will be slightly longer,
    125 		// the MOVs will have no dependencies and pipeline better
    126 		// than the unrolled STOSL loop.
    127 		// NOTE: Must use agen, not igen, so that optimizer sees address
    128 		// being taken. We are not writing on field boundaries.
    129 		var n1 gc.Node
    130 		gc.Regalloc(&n1, gc.Types[gc.Tptr], nil)
    131 
    132 		gc.Agen(nl, &n1)
    133 		n1.Op = gc.OINDREG
    134 		var z gc.Node
    135 		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
    136 		for {
    137 			tmp14 := q
    138 			q--
    139 			if tmp14 <= 0 {
    140 				break
    141 			}
    142 			n1.Type = z.Type
    143 			gins(x86.AMOVL, &z, &n1)
    144 			n1.Xoffset += 4
    145 		}
    146 
    147 		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
    148 		for {
    149 			tmp15 := c
    150 			c--
    151 			if tmp15 <= 0 {
    152 				break
    153 			}
    154 			n1.Type = z.Type
    155 			gins(x86.AMOVB, &z, &n1)
    156 			n1.Xoffset++
    157 		}
    158 
    159 		gc.Regfree(&n1)
    160 		return
    161 	}
    162 
    163 	var n1 gc.Node
    164 	gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI)
    165 	gc.Agen(nl, &n1)
    166 	gconreg(x86.AMOVL, 0, x86.REG_AX)
    167 
    168 	if q > 128 || (q >= 4 && gc.Nacl) {
    169 		gconreg(x86.AMOVL, int64(q), x86.REG_CX)
    170 		gins(x86.AREP, nil, nil)   // repeat
    171 		gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
    172 	} else if q >= 4 {
    173 		p := gins(obj.ADUFFZERO, nil, nil)
    174 		p.To.Type = obj.TYPE_ADDR
    175 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    176 
    177 		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
    178 		p.To.Offset = 1 * (128 - int64(q))
    179 	} else {
    180 		for q > 0 {
    181 			gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
    182 			q--
    183 		}
    184 	}
    185 
    186 	for c > 0 {
    187 		gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
    188 		c--
    189 	}
    190 }
    191 
    192 var panicdiv *gc.Node
    193 
    194 /*
    195  * generate division.
    196  * caller must set:
    197  *	ax = allocated AX register
    198  *	dx = allocated DX register
    199  * generates one of:
    200  *	res = nl / nr
    201  *	res = nl % nr
    202  * according to op.
    203  */
    204 func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
    205 	// Have to be careful about handling
    206 	// most negative int divided by -1 correctly.
    207 	// The hardware will trap.
    208 	// Also the byte divide instruction needs AH,
    209 	// which we otherwise don't have to deal with.
    210 	// Easiest way to avoid for int8, int16: use int32.
    211 	// For int32 and int64, use explicit test.
    212 	// Could use int64 hw for int32.
    213 	t := nl.Type
    214 
    215 	t0 := t
    216 	check := 0
    217 	if gc.Issigned[t.Etype] {
    218 		check = 1
    219 		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -1<<uint64(t.Width*8-1) {
    220 			check = 0
    221 		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
    222 			check = 0
    223 		}
    224 	}
    225 
    226 	if t.Width < 4 {
    227 		if gc.Issigned[t.Etype] {
    228 			t = gc.Types[gc.TINT32]
    229 		} else {
    230 			t = gc.Types[gc.TUINT32]
    231 		}
    232 		check = 0
    233 	}
    234 
    235 	var t1 gc.Node
    236 	gc.Tempname(&t1, t)
    237 	var t2 gc.Node
    238 	gc.Tempname(&t2, t)
    239 	if t0 != t {
    240 		var t3 gc.Node
    241 		gc.Tempname(&t3, t0)
    242 		var t4 gc.Node
    243 		gc.Tempname(&t4, t0)
    244 		gc.Cgen(nl, &t3)
    245 		gc.Cgen(nr, &t4)
    246 
    247 		// Convert.
    248 		gmove(&t3, &t1)
    249 
    250 		gmove(&t4, &t2)
    251 	} else {
    252 		gc.Cgen(nl, &t1)
    253 		gc.Cgen(nr, &t2)
    254 	}
    255 
    256 	var n1 gc.Node
    257 	if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
    258 		gc.Regalloc(&n1, t, res)
    259 	} else {
    260 		gc.Regalloc(&n1, t, nil)
    261 	}
    262 	gmove(&t2, &n1)
    263 	gmove(&t1, ax)
    264 	var p2 *obj.Prog
    265 	var n4 gc.Node
    266 	if gc.Nacl {
    267 		// Native Client does not relay the divide-by-zero trap
    268 		// to the executing program, so we must insert a check
    269 		// for ourselves.
    270 		gc.Nodconst(&n4, t, 0)
    271 
    272 		gins(optoas(gc.OCMP, t), &n1, &n4)
    273 		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
    274 		if panicdiv == nil {
    275 			panicdiv = gc.Sysfunc("panicdivide")
    276 		}
    277 		gc.Ginscall(panicdiv, -1)
    278 		gc.Patch(p1, gc.Pc)
    279 	}
    280 
    281 	if check != 0 {
    282 		gc.Nodconst(&n4, t, -1)
    283 		gins(optoas(gc.OCMP, t), &n1, &n4)
    284 		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
    285 		if op == gc.ODIV {
    286 			// a / (-1) is -a.
    287 			gins(optoas(gc.OMINUS, t), nil, ax)
    288 
    289 			gmove(ax, res)
    290 		} else {
    291 			// a % (-1) is 0.
    292 			gc.Nodconst(&n4, t, 0)
    293 
    294 			gmove(&n4, res)
    295 		}
    296 
    297 		p2 = gc.Gbranch(obj.AJMP, nil, 0)
    298 		gc.Patch(p1, gc.Pc)
    299 	}
    300 
    301 	if !gc.Issigned[t.Etype] {
    302 		var nz gc.Node
    303 		gc.Nodconst(&nz, t, 0)
    304 		gmove(&nz, dx)
    305 	} else {
    306 		gins(optoas(gc.OEXTEND, t), nil, nil)
    307 	}
    308 	gins(optoas(op, t), &n1, nil)
    309 	gc.Regfree(&n1)
    310 
    311 	if op == gc.ODIV {
    312 		gmove(ax, res)
    313 	} else {
    314 		gmove(dx, res)
    315 	}
    316 	if check != 0 {
    317 		gc.Patch(p2, gc.Pc)
    318 	}
    319 }
    320 
    321 func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
    322 	r := gc.GetReg(dr)
    323 	gc.Nodreg(x, gc.Types[gc.TINT32], dr)
    324 
    325 	// save current ax and dx if they are live
    326 	// and not the destination
    327 	*oldx = gc.Node{}
    328 
    329 	if r > 0 && !gc.Samereg(x, res) {
    330 		gc.Tempname(oldx, gc.Types[gc.TINT32])
    331 		gmove(x, oldx)
    332 	}
    333 
    334 	gc.Regalloc(x, t, x)
    335 }
    336 
    337 func restx(x *gc.Node, oldx *gc.Node) {
    338 	gc.Regfree(x)
    339 
    340 	if oldx.Op != 0 {
    341 		x.Type = gc.Types[gc.TINT32]
    342 		gmove(oldx, x)
    343 	}
    344 }
    345 
    346 /*
    347  * generate division according to op, one of:
    348  *	res = nl / nr
    349  *	res = nl % nr
    350  */
    351 func cgen_div(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
    352 	if gc.Is64(nl.Type) {
    353 		gc.Fatal("cgen_div %v", nl.Type)
    354 	}
    355 
    356 	var t *gc.Type
    357 	if gc.Issigned[nl.Type.Etype] {
    358 		t = gc.Types[gc.TINT32]
    359 	} else {
    360 		t = gc.Types[gc.TUINT32]
    361 	}
    362 	var ax gc.Node
    363 	var oldax gc.Node
    364 	savex(x86.REG_AX, &ax, &oldax, res, t)
    365 	var olddx gc.Node
    366 	var dx gc.Node
    367 	savex(x86.REG_DX, &dx, &olddx, res, t)
    368 	dodiv(op, nl, nr, res, &ax, &dx)
    369 	restx(&dx, &olddx)
    370 	restx(&ax, &oldax)
    371 }
    372 
    373 /*
    374  * generate shift according to op, one of:
    375  *	res = nl << nr
    376  *	res = nl >> nr
    377  */
    378 func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
    379 	if nl.Type.Width > 4 {
    380 		gc.Fatal("cgen_shift %v", nl.Type)
    381 	}
    382 
    383 	w := int(nl.Type.Width * 8)
    384 
    385 	a := optoas(op, nl.Type)
    386 
    387 	if nr.Op == gc.OLITERAL {
    388 		var n2 gc.Node
    389 		gc.Tempname(&n2, nl.Type)
    390 		gc.Cgen(nl, &n2)
    391 		var n1 gc.Node
    392 		gc.Regalloc(&n1, nl.Type, res)
    393 		gmove(&n2, &n1)
    394 		sc := uint64(nr.Int())
    395 		if sc >= uint64(nl.Type.Width*8) {
    396 			// large shift gets 2 shifts by width-1
    397 			gins(a, ncon(uint32(w)-1), &n1)
    398 
    399 			gins(a, ncon(uint32(w)-1), &n1)
    400 		} else {
    401 			gins(a, nr, &n1)
    402 		}
    403 		gmove(&n1, res)
    404 		gc.Regfree(&n1)
    405 		return
    406 	}
    407 
    408 	var oldcx gc.Node
    409 	var cx gc.Node
    410 	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
    411 	if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) {
    412 		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
    413 		gmove(&cx, &oldcx)
    414 	}
    415 
    416 	var n1 gc.Node
    417 	var nt gc.Node
    418 	if nr.Type.Width > 4 {
    419 		gc.Tempname(&nt, nr.Type)
    420 		n1 = nt
    421 	} else {
    422 		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
    423 		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
    424 	}
    425 
    426 	var n2 gc.Node
    427 	if gc.Samereg(&cx, res) {
    428 		gc.Regalloc(&n2, nl.Type, nil)
    429 	} else {
    430 		gc.Regalloc(&n2, nl.Type, res)
    431 	}
    432 	if nl.Ullman >= nr.Ullman {
    433 		gc.Cgen(nl, &n2)
    434 		gc.Cgen(nr, &n1)
    435 	} else {
    436 		gc.Cgen(nr, &n1)
    437 		gc.Cgen(nl, &n2)
    438 	}
    439 
    440 	// test and fix up large shifts
    441 	if bounded {
    442 		if nr.Type.Width > 4 {
    443 			// delayed reg alloc
    444 			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
    445 
    446 			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
    447 			var lo gc.Node
    448 			var hi gc.Node
    449 			split64(&nt, &lo, &hi)
    450 			gmove(&lo, &n1)
    451 			splitclean()
    452 		}
    453 	} else {
    454 		var p1 *obj.Prog
    455 		if nr.Type.Width > 4 {
    456 			// delayed reg alloc
    457 			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
    458 
    459 			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
    460 			var lo gc.Node
    461 			var hi gc.Node
    462 			split64(&nt, &lo, &hi)
    463 			gmove(&lo, &n1)
    464 			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
    465 			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
    466 			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
    467 			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
    468 			splitclean()
    469 			gc.Patch(p2, gc.Pc)
    470 		} else {
    471 			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
    472 			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
    473 		}
    474 
    475 		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
    476 			gins(a, ncon(uint32(w)-1), &n2)
    477 		} else {
    478 			gmove(ncon(0), &n2)
    479 		}
    480 
    481 		gc.Patch(p1, gc.Pc)
    482 	}
    483 
    484 	gins(a, &n1, &n2)
    485 
    486 	if oldcx.Op != 0 {
    487 		gmove(&oldcx, &cx)
    488 	}
    489 
    490 	gmove(&n2, res)
    491 
    492 	gc.Regfree(&n1)
    493 	gc.Regfree(&n2)
    494 }
    495 
    496 /*
    497  * generate byte multiply:
    498  *	res = nl * nr
    499  * there is no 2-operand byte multiply instruction so
    500  * we do a full-width multiplication and truncate afterwards.
    501  */
    502 func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
    503 	if optoas(op, nl.Type) != x86.AIMULB {
    504 		return false
    505 	}
    506 
    507 	// copy from byte to full registers
    508 	t := gc.Types[gc.TUINT32]
    509 
    510 	if gc.Issigned[nl.Type.Etype] {
    511 		t = gc.Types[gc.TINT32]
    512 	}
    513 
    514 	// largest ullman on left.
    515 	if nl.Ullman < nr.Ullman {
    516 		tmp := nl
    517 		nl = nr
    518 		nr = tmp
    519 	}
    520 
    521 	var nt gc.Node
    522 	gc.Tempname(&nt, nl.Type)
    523 	gc.Cgen(nl, &nt)
    524 	var n1 gc.Node
    525 	gc.Regalloc(&n1, t, res)
    526 	gc.Cgen(nr, &n1)
    527 	var n2 gc.Node
    528 	gc.Regalloc(&n2, t, nil)
    529 	gmove(&nt, &n2)
    530 	a := optoas(op, t)
    531 	gins(a, &n2, &n1)
    532 	gc.Regfree(&n2)
    533 	gmove(&n1, res)
    534 	gc.Regfree(&n1)
    535 
    536 	return true
    537 }
    538 
    539 /*
    540  * generate high multiply:
    541  *   res = (nl*nr) >> width
    542  */
    543 func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
    544 	var n1 gc.Node
    545 	var n2 gc.Node
    546 	var ax gc.Node
    547 	var dx gc.Node
    548 
    549 	t := nl.Type
    550 	a := optoas(gc.OHMUL, t)
    551 
    552 	// gen nl in n1.
    553 	gc.Tempname(&n1, t)
    554 
    555 	gc.Cgen(nl, &n1)
    556 
    557 	// gen nr in n2.
    558 	gc.Regalloc(&n2, t, res)
    559 
    560 	gc.Cgen(nr, &n2)
    561 
    562 	// multiply.
    563 	gc.Nodreg(&ax, t, x86.REG_AX)
    564 
    565 	gmove(&n2, &ax)
    566 	gins(a, &n1, nil)
    567 	gc.Regfree(&n2)
    568 
    569 	if t.Width == 1 {
    570 		// byte multiply behaves differently.
    571 		gc.Nodreg(&ax, t, x86.REG_AH)
    572 
    573 		gc.Nodreg(&dx, t, x86.REG_DX)
    574 		gmove(&ax, &dx)
    575 	}
    576 
    577 	gc.Nodreg(&dx, t, x86.REG_DX)
    578 	gmove(&dx, res)
    579 }
    580 
    581 /*
    582  * generate floating-point operation.
    583  */
    584 func cgen_float(n *gc.Node, res *gc.Node) {
    585 	nl := n.Left
    586 	switch n.Op {
    587 	case gc.OEQ,
    588 		gc.ONE,
    589 		gc.OLT,
    590 		gc.OLE,
    591 		gc.OGE:
    592 		p1 := gc.Gbranch(obj.AJMP, nil, 0)
    593 		p2 := gc.Pc
    594 		gmove(gc.Nodbool(true), res)
    595 		p3 := gc.Gbranch(obj.AJMP, nil, 0)
    596 		gc.Patch(p1, gc.Pc)
    597 		gc.Bgen(n, true, 0, p2)
    598 		gmove(gc.Nodbool(false), res)
    599 		gc.Patch(p3, gc.Pc)
    600 		return
    601 
    602 	case gc.OPLUS:
    603 		gc.Cgen(nl, res)
    604 		return
    605 
    606 	case gc.OCONV:
    607 		if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
    608 			gc.Cgen(nl, res)
    609 			return
    610 		}
    611 
    612 		var n2 gc.Node
    613 		gc.Tempname(&n2, n.Type)
    614 		var n1 gc.Node
    615 		gc.Mgen(nl, &n1, res)
    616 		gmove(&n1, &n2)
    617 		gmove(&n2, res)
    618 		gc.Mfree(&n1)
    619 		return
    620 	}
    621 
    622 	if gc.Thearch.Use387 {
    623 		cgen_float387(n, res)
    624 	} else {
    625 		cgen_floatsse(n, res)
    626 	}
    627 }
    628 
    629 // floating-point.  387 (not SSE2)
    630 func cgen_float387(n *gc.Node, res *gc.Node) {
    631 	var f0 gc.Node
    632 	var f1 gc.Node
    633 
    634 	nl := n.Left
    635 	nr := n.Right
    636 	gc.Nodreg(&f0, nl.Type, x86.REG_F0)
    637 	gc.Nodreg(&f1, n.Type, x86.REG_F0+1)
    638 	if nr != nil {
    639 		// binary
    640 		if nl.Ullman >= nr.Ullman {
    641 			gc.Cgen(nl, &f0)
    642 			if nr.Addable {
    643 				gins(foptoas(int(n.Op), n.Type, 0), nr, &f0)
    644 			} else {
    645 				gc.Cgen(nr, &f0)
    646 				gins(foptoas(int(n.Op), n.Type, Fpop), &f0, &f1)
    647 			}
    648 		} else {
    649 			gc.Cgen(nr, &f0)
    650 			if nl.Addable {
    651 				gins(foptoas(int(n.Op), n.Type, Frev), nl, &f0)
    652 			} else {
    653 				gc.Cgen(nl, &f0)
    654 				gins(foptoas(int(n.Op), n.Type, Frev|Fpop), &f0, &f1)
    655 			}
    656 		}
    657 
    658 		gmove(&f0, res)
    659 		return
    660 	}
    661 
    662 	// unary
    663 	gc.Cgen(nl, &f0)
    664 
    665 	if n.Op != gc.OCONV && n.Op != gc.OPLUS {
    666 		gins(foptoas(int(n.Op), n.Type, 0), nil, nil)
    667 	}
    668 	gmove(&f0, res)
    669 	return
    670 }
    671 
    672 func cgen_floatsse(n *gc.Node, res *gc.Node) {
    673 	var a int
    674 
    675 	nl := n.Left
    676 	nr := n.Right
    677 	switch n.Op {
    678 	default:
    679 		gc.Dump("cgen_floatsse", n)
    680 		gc.Fatal("cgen_floatsse %v", gc.Oconv(int(n.Op), 0))
    681 		return
    682 
    683 	case gc.OMINUS,
    684 		gc.OCOM:
    685 		nr = gc.Nodintconst(-1)
    686 		gc.Convlit(&nr, n.Type)
    687 		a = foptoas(gc.OMUL, nl.Type, 0)
    688 		goto sbop
    689 
    690 		// symmetric binary
    691 	case gc.OADD,
    692 		gc.OMUL:
    693 		a = foptoas(int(n.Op), nl.Type, 0)
    694 
    695 		goto sbop
    696 
    697 		// asymmetric binary
    698 	case gc.OSUB,
    699 		gc.OMOD,
    700 		gc.ODIV:
    701 		a = foptoas(int(n.Op), nl.Type, 0)
    702 
    703 		goto abop
    704 	}
    705 
    706 sbop: // symmetric binary
    707 	if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
    708 		r := nl
    709 		nl = nr
    710 		nr = r
    711 	}
    712 
    713 abop: // asymmetric binary
    714 	if nl.Ullman >= nr.Ullman {
    715 		var nt gc.Node
    716 		gc.Tempname(&nt, nl.Type)
    717 		gc.Cgen(nl, &nt)
    718 		var n2 gc.Node
    719 		gc.Mgen(nr, &n2, nil)
    720 		var n1 gc.Node
    721 		gc.Regalloc(&n1, nl.Type, res)
    722 		gmove(&nt, &n1)
    723 		gins(a, &n2, &n1)
    724 		gmove(&n1, res)
    725 		gc.Regfree(&n1)
    726 		gc.Mfree(&n2)
    727 	} else {
    728 		var n2 gc.Node
    729 		gc.Regalloc(&n2, nr.Type, res)
    730 		gc.Cgen(nr, &n2)
    731 		var n1 gc.Node
    732 		gc.Regalloc(&n1, nl.Type, nil)
    733 		gc.Cgen(nl, &n1)
    734 		gins(a, &n2, &n1)
    735 		gc.Regfree(&n2)
    736 		gmove(&n1, res)
    737 		gc.Regfree(&n1)
    738 	}
    739 
    740 	return
    741 }
    742 
    743 func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) {
    744 	nl := n.Left
    745 	nr := n.Right
    746 	a := int(n.Op)
    747 	if !wantTrue {
    748 		// brcom is not valid on floats when NaN is involved.
    749 		p1 := gc.Gbranch(obj.AJMP, nil, 0)
    750 		p2 := gc.Gbranch(obj.AJMP, nil, 0)
    751 		gc.Patch(p1, gc.Pc)
    752 
    753 		// No need to avoid re-genning ninit.
    754 		bgen_float(n, true, -likely, p2)
    755 
    756 		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
    757 		gc.Patch(p2, gc.Pc)
    758 		return
    759 	}
    760 
    761 	if gc.Thearch.Use387 {
    762 		a = gc.Brrev(a) // because the args are stacked
    763 		if a == gc.OGE || a == gc.OGT {
    764 			// only < and <= work right with NaN; reverse if needed
    765 			nl, nr = nr, nl
    766 			a = gc.Brrev(a)
    767 		}
    768 
    769 		var ax, n2, tmp gc.Node
    770 		gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
    771 		gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
    772 		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
    773 		if gc.Simsimtype(nr.Type) == gc.TFLOAT64 {
    774 			if nl.Ullman > nr.Ullman {
    775 				gc.Cgen(nl, &tmp)
    776 				gc.Cgen(nr, &tmp)
    777 				gins(x86.AFXCHD, &tmp, &n2)
    778 			} else {
    779 				gc.Cgen(nr, &tmp)
    780 				gc.Cgen(nl, &tmp)
    781 			}
    782 
    783 			gins(x86.AFUCOMIP, &tmp, &n2)
    784 			gins(x86.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF
    785 		} else {
    786 			// TODO(rsc): The moves back and forth to memory
    787 			// here are for truncating the value to 32 bits.
    788 			// This handles 32-bit comparison but presumably
    789 			// all the other ops have the same problem.
    790 			// We need to figure out what the right general
    791 			// solution is, besides telling people to use float64.
    792 			var t1 gc.Node
    793 			gc.Tempname(&t1, gc.Types[gc.TFLOAT32])
    794 
    795 			var t2 gc.Node
    796 			gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
    797 			gc.Cgen(nr, &t1)
    798 			gc.Cgen(nl, &t2)
    799 			gmove(&t2, &tmp)
    800 			gins(x86.AFCOMFP, &t1, &tmp)
    801 			gins(x86.AFSTSW, nil, &ax)
    802 			gins(x86.ASAHF, nil, nil)
    803 		}
    804 	} else {
    805 		// Not 387
    806 		if !nl.Addable {
    807 			nl = gc.CgenTemp(nl)
    808 		}
    809 		if !nr.Addable {
    810 			nr = gc.CgenTemp(nr)
    811 		}
    812 
    813 		var n2 gc.Node
    814 		gc.Regalloc(&n2, nr.Type, nil)
    815 		gmove(nr, &n2)
    816 		nr = &n2
    817 
    818 		if nl.Op != gc.OREGISTER {
    819 			var n3 gc.Node
    820 			gc.Regalloc(&n3, nl.Type, nil)
    821 			gmove(nl, &n3)
    822 			nl = &n3
    823 		}
    824 
    825 		if a == gc.OGE || a == gc.OGT {
    826 			// only < and <= work right with NaN; reverse if needed
    827 			nl, nr = nr, nl
    828 			a = gc.Brrev(a)
    829 		}
    830 
    831 		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
    832 		if nl.Op == gc.OREGISTER {
    833 			gc.Regfree(nl)
    834 		}
    835 		gc.Regfree(nr)
    836 	}
    837 
    838 	switch a {
    839 	case gc.OEQ:
    840 		// neither NE nor P
    841 		p1 := gc.Gbranch(x86.AJNE, nil, -likely)
    842 		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
    843 		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
    844 		gc.Patch(p1, gc.Pc)
    845 		gc.Patch(p2, gc.Pc)
    846 	case gc.ONE:
    847 		// either NE or P
    848 		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)
    849 		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
    850 	default:
    851 		gc.Patch(gc.Gbranch(optoas(a, nr.Type), nil, likely), to)
    852 	}
    853 }
    854 
    855 // Called after regopt and peep have run.
    856 // Expand CHECKNIL pseudo-op into actual nil pointer check.
    857 func expandchecks(firstp *obj.Prog) {
    858 	var p1 *obj.Prog
    859 	var p2 *obj.Prog
    860 
    861 	for p := firstp; p != nil; p = p.Link {
    862 		if p.As != obj.ACHECKNIL {
    863 			continue
    864 		}
    865 		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
    866 			gc.Warnl(int(p.Lineno), "generated nil check")
    867 		}
    868 
    869 		// check is
    870 		//	CMP arg, $0
    871 		//	JNE 2(PC) (likely)
    872 		//	MOV AX, 0
    873 		p1 = gc.Ctxt.NewProg()
    874 
    875 		p2 = gc.Ctxt.NewProg()
    876 		gc.Clearp(p1)
    877 		gc.Clearp(p2)
    878 		p1.Link = p2
    879 		p2.Link = p.Link
    880 		p.Link = p1
    881 		p1.Lineno = p.Lineno
    882 		p2.Lineno = p.Lineno
    883 		p1.Pc = 9999
    884 		p2.Pc = 9999
    885 		p.As = x86.ACMPL
    886 		p.To.Type = obj.TYPE_CONST
    887 		p.To.Offset = 0
    888 		p1.As = x86.AJNE
    889 		p1.From.Type = obj.TYPE_CONST
    890 		p1.From.Offset = 1 // likely
    891 		p1.To.Type = obj.TYPE_BRANCH
    892 		p1.To.Val = p2.Link
    893 
    894 		// crash by write to memory address 0.
    895 		// if possible, since we know arg is 0, use 0(arg),
    896 		// which will be shorter to encode than plain 0.
    897 		p2.As = x86.AMOVL
    898 
    899 		p2.From.Type = obj.TYPE_REG
    900 		p2.From.Reg = x86.REG_AX
    901 		if regtyp(&p.From) {
    902 			p2.To.Type = obj.TYPE_MEM
    903 			p2.To.Reg = p.From.Reg
    904 		} else {
    905 			p2.To.Type = obj.TYPE_MEM
    906 		}
    907 		p2.To.Offset = 0
    908 	}
    909 }
    910 
    911 // addr += index*width if possible.
    912 func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
    913 	switch width {
    914 	case 1, 2, 4, 8:
    915 		p1 := gins(x86.ALEAL, index, addr)
    916 		p1.From.Type = obj.TYPE_MEM
    917 		p1.From.Scale = int16(width)
    918 		p1.From.Index = p1.From.Reg
    919 		p1.From.Reg = p1.To.Reg
    920 		return true
    921 	}
    922 	return false
    923 }
    924 
    925 // res = runtime.getg()
    926 func getg(res *gc.Node) {
    927 	var n1 gc.Node
    928 	gc.Regalloc(&n1, res.Type, res)
    929 	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
    930 	p := gins(mov, nil, &n1)
    931 	p.From.Type = obj.TYPE_REG
    932 	p.From.Reg = x86.REG_TLS
    933 	p = gins(mov, nil, &n1)
    934 	p.From = p.To
    935 	p.From.Type = obj.TYPE_MEM
    936 	p.From.Index = x86.REG_TLS
    937 	p.From.Scale = 1
    938 	gmove(&n1, res)
    939 	gc.Regfree(&n1)
    940 }
    941