Home | History | Annotate | Download | only in amd64
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package amd64
      6 
      7 import (
      8 	"cmd/compile/internal/gc"
      9 	"cmd/internal/obj"
     10 	"cmd/internal/obj/x86"
     11 )
     12 
     13 func defframe(ptxt *obj.Prog) {
     14 	var n *gc.Node
     15 
     16 	// fill in argument size, stack size
     17 	ptxt.To.Type = obj.TYPE_TEXTSIZE
     18 
     19 	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
     20 	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
     21 	ptxt.To.Offset = int64(frame)
     22 
     23 	// insert code to zero ambiguously live variables
     24 	// so that the garbage collector only sees initialized values
     25 	// when it looks for pointers.
     26 	p := ptxt
     27 
     28 	hi := int64(0)
     29 	lo := hi
     30 	ax := uint32(0)
     31 
     32 	// iterate through declarations - they are sorted in decreasing xoffset order.
     33 	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
     34 		n = l.N
     35 		if !n.Name.Needzero {
     36 			continue
     37 		}
     38 		if n.Class != gc.PAUTO {
     39 			gc.Fatal("needzero class %d", n.Class)
     40 		}
     41 		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
     42 			gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
     43 		}
     44 
     45 		if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) {
     46 			// merge with range we already have
     47 			lo = n.Xoffset
     48 
     49 			continue
     50 		}
     51 
     52 		// zero old range
     53 		p = zerorange(p, int64(frame), lo, hi, &ax)
     54 
     55 		// set new range
     56 		hi = n.Xoffset + n.Type.Width
     57 
     58 		lo = n.Xoffset
     59 	}
     60 
     61 	// zero final range
     62 	zerorange(p, int64(frame), lo, hi, &ax)
     63 }
     64 
     65 // DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
     66 // with 4 STOSQs at the very end.
     67 // The trailing STOSQs prevent the need for a DI preadjustment
     68 // for small numbers of words to clear.
     69 // See runtime/mkduff.go.
     70 const (
     71 	dzBlocks    = 31 // number of MOV/ADD blocks
     72 	dzBlockLen  = 4  // number of clears per block
     73 	dzBlockSize = 19 // size of instructions in a single block
     74 	dzMovSize   = 4  // size of single MOV instruction w/ offset
     75 	dzAddSize   = 4  // size of single ADD instruction
     76 	dzDIStep    = 8  // number of bytes cleared by each MOV instruction
     77 
     78 	dzTailLen  = 4 // number of final STOSQ instructions
     79 	dzTailSize = 2 // size of single STOSQ instruction
     80 
     81 	dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
     82 )
     83 
     84 // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
     85 // q is the number of words to zero.
     86 func dzDI(q int64) int64 {
     87 	if q < dzTailLen {
     88 		return 0
     89 	}
     90 	q -= dzTailLen
     91 	if q%dzBlockLen == 0 {
     92 		return 0
     93 	}
     94 	return -dzDIStep * (dzBlockLen - q%dzBlockLen)
     95 }
     96 
     97 // dzOff returns the offset for a jump into DUFFZERO.
     98 // q is the number of words to zero.
     99 func dzOff(q int64) int64 {
    100 	off := int64(dzSize)
    101 	if q < dzTailLen {
    102 		return off - q*dzTailSize
    103 	}
    104 	off -= dzTailLen * dzTailSize
    105 	q -= dzTailLen
    106 	blocks, steps := q/dzBlockLen, q%dzBlockLen
    107 	off -= dzBlockSize * blocks
    108 	if steps > 0 {
    109 		off -= dzAddSize + dzMovSize*steps
    110 	}
    111 	return off
    112 }
    113 
    114 func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
    115 	cnt := hi - lo
    116 	if cnt == 0 {
    117 		return p
    118 	}
    119 	if *ax == 0 {
    120 		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    121 		*ax = 1
    122 	}
    123 
    124 	if cnt%int64(gc.Widthreg) != 0 {
    125 		// should only happen with nacl
    126 		if cnt%int64(gc.Widthptr) != 0 {
    127 			gc.Fatal("zerorange count not a multiple of widthptr %d", cnt)
    128 		}
    129 		p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo)
    130 		lo += int64(gc.Widthptr)
    131 		cnt -= int64(gc.Widthptr)
    132 	}
    133 
    134 	if cnt <= int64(4*gc.Widthreg) {
    135 		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
    136 			p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
    137 		}
    138 	} else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) {
    139 		q := cnt / int64(gc.Widthreg)
    140 		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(q), obj.TYPE_REG, x86.REG_DI, 0)
    141 		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(q))
    142 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    143 	} else {
    144 		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
    145 		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
    146 		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    147 		p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
    148 	}
    149 
    150 	return p
    151 }
    152 
    153 func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
    154 	q := gc.Ctxt.NewProg()
    155 	gc.Clearp(q)
    156 	q.As = int16(as)
    157 	q.Lineno = p.Lineno
    158 	q.From.Type = int16(ftype)
    159 	q.From.Reg = int16(freg)
    160 	q.From.Offset = foffset
    161 	q.To.Type = int16(ttype)
    162 	q.To.Reg = int16(treg)
    163 	q.To.Offset = toffset
    164 	q.Link = p.Link
    165 	p.Link = q
    166 	return q
    167 }
    168 
    169 var panicdiv *gc.Node
    170 
    171 /*
    172  * generate division.
    173  * generates one of:
    174  *	res = nl / nr
    175  *	res = nl % nr
    176  * according to op.
    177  */
    178 func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
    179 	// Have to be careful about handling
    180 	// most negative int divided by -1 correctly.
    181 	// The hardware will trap.
    182 	// Also the byte divide instruction needs AH,
    183 	// which we otherwise don't have to deal with.
    184 	// Easiest way to avoid for int8, int16: use int32.
    185 	// For int32 and int64, use explicit test.
    186 	// Could use int64 hw for int32.
    187 	t := nl.Type
    188 
    189 	t0 := t
    190 	check := 0
    191 	if gc.Issigned[t.Etype] {
    192 		check = 1
    193 		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) {
    194 			check = 0
    195 		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
    196 			check = 0
    197 		}
    198 	}
    199 
    200 	if t.Width < 4 {
    201 		if gc.Issigned[t.Etype] {
    202 			t = gc.Types[gc.TINT32]
    203 		} else {
    204 			t = gc.Types[gc.TUINT32]
    205 		}
    206 		check = 0
    207 	}
    208 
    209 	a := optoas(op, t)
    210 
    211 	var n3 gc.Node
    212 	gc.Regalloc(&n3, t0, nil)
    213 	var ax gc.Node
    214 	var oldax gc.Node
    215 	if nl.Ullman >= nr.Ullman {
    216 		savex(x86.REG_AX, &ax, &oldax, res, t0)
    217 		gc.Cgen(nl, &ax)
    218 		gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen
    219 		gc.Cgen(nr, &n3)
    220 		gc.Regfree(&ax)
    221 	} else {
    222 		gc.Cgen(nr, &n3)
    223 		savex(x86.REG_AX, &ax, &oldax, res, t0)
    224 		gc.Cgen(nl, &ax)
    225 	}
    226 
    227 	if t != t0 {
    228 		// Convert
    229 		ax1 := ax
    230 
    231 		n31 := n3
    232 		ax.Type = t
    233 		n3.Type = t
    234 		gmove(&ax1, &ax)
    235 		gmove(&n31, &n3)
    236 	}
    237 
    238 	var n4 gc.Node
    239 	if gc.Nacl {
    240 		// Native Client does not relay the divide-by-zero trap
    241 		// to the executing program, so we must insert a check
    242 		// for ourselves.
    243 		gc.Nodconst(&n4, t, 0)
    244 
    245 		gins(optoas(gc.OCMP, t), &n3, &n4)
    246 		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
    247 		if panicdiv == nil {
    248 			panicdiv = gc.Sysfunc("panicdivide")
    249 		}
    250 		gc.Ginscall(panicdiv, -1)
    251 		gc.Patch(p1, gc.Pc)
    252 	}
    253 
    254 	var p2 *obj.Prog
    255 	if check != 0 {
    256 		gc.Nodconst(&n4, t, -1)
    257 		gins(optoas(gc.OCMP, t), &n3, &n4)
    258 		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
    259 		if op == gc.ODIV {
    260 			// a / (-1) is -a.
    261 			gins(optoas(gc.OMINUS, t), nil, &ax)
    262 
    263 			gmove(&ax, res)
    264 		} else {
    265 			// a % (-1) is 0.
    266 			gc.Nodconst(&n4, t, 0)
    267 
    268 			gmove(&n4, res)
    269 		}
    270 
    271 		p2 = gc.Gbranch(obj.AJMP, nil, 0)
    272 		gc.Patch(p1, gc.Pc)
    273 	}
    274 
    275 	var olddx gc.Node
    276 	var dx gc.Node
    277 	savex(x86.REG_DX, &dx, &olddx, res, t)
    278 	if !gc.Issigned[t.Etype] {
    279 		gc.Nodconst(&n4, t, 0)
    280 		gmove(&n4, &dx)
    281 	} else {
    282 		gins(optoas(gc.OEXTEND, t), nil, nil)
    283 	}
    284 	gins(a, &n3, nil)
    285 	gc.Regfree(&n3)
    286 	if op == gc.ODIV {
    287 		gmove(&ax, res)
    288 	} else {
    289 		gmove(&dx, res)
    290 	}
    291 	restx(&dx, &olddx)
    292 	if check != 0 {
    293 		gc.Patch(p2, gc.Pc)
    294 	}
    295 	restx(&ax, &oldax)
    296 }
    297 
    298 /*
    299  * register dr is one of the special ones (AX, CX, DI, SI, etc.).
    300  * we need to use it.  if it is already allocated as a temporary
    301  * (r > 1; can only happen if a routine like sgen passed a
    302  * special as cgen's res and then cgen used regalloc to reuse
    303  * it as its own temporary), then move it for now to another
    304  * register.  caller must call restx to move it back.
    305  * the move is not necessary if dr == res, because res is
    306  * known to be dead.
    307  */
    308 func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
    309 	r := uint8(gc.GetReg(dr))
    310 
    311 	// save current ax and dx if they are live
    312 	// and not the destination
    313 	*oldx = gc.Node{}
    314 
    315 	gc.Nodreg(x, t, dr)
    316 	if r > 1 && !gc.Samereg(x, res) {
    317 		gc.Regalloc(oldx, gc.Types[gc.TINT64], nil)
    318 		x.Type = gc.Types[gc.TINT64]
    319 		gmove(x, oldx)
    320 		x.Type = t
    321 		oldx.Etype = r // squirrel away old r value
    322 		gc.SetReg(dr, 1)
    323 	}
    324 }
    325 
    326 func restx(x *gc.Node, oldx *gc.Node) {
    327 	if oldx.Op != 0 {
    328 		x.Type = gc.Types[gc.TINT64]
    329 		gc.SetReg(int(x.Reg), int(oldx.Etype))
    330 		gmove(oldx, x)
    331 		gc.Regfree(oldx)
    332 	}
    333 }
    334 
    335 /*
    336  * generate high multiply:
    337  *   res = (nl*nr) >> width
    338  */
    339 func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
    340 	t := nl.Type
    341 	a := optoas(gc.OHMUL, t)
    342 	if nl.Ullman < nr.Ullman {
    343 		tmp := nl
    344 		nl = nr
    345 		nr = tmp
    346 	}
    347 
    348 	var n1 gc.Node
    349 	gc.Cgenr(nl, &n1, res)
    350 	var n2 gc.Node
    351 	gc.Cgenr(nr, &n2, nil)
    352 	var ax gc.Node
    353 	gc.Nodreg(&ax, t, x86.REG_AX)
    354 	gmove(&n1, &ax)
    355 	gins(a, &n2, nil)
    356 	gc.Regfree(&n2)
    357 	gc.Regfree(&n1)
    358 
    359 	var dx gc.Node
    360 	if t.Width == 1 {
    361 		// byte multiply behaves differently.
    362 		gc.Nodreg(&ax, t, x86.REG_AH)
    363 
    364 		gc.Nodreg(&dx, t, x86.REG_DX)
    365 		gmove(&ax, &dx)
    366 	}
    367 
    368 	gc.Nodreg(&dx, t, x86.REG_DX)
    369 	gmove(&dx, res)
    370 }
    371 
    372 /*
    373  * generate shift according to op, one of:
    374  *	res = nl << nr
    375  *	res = nl >> nr
    376  */
    377 func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
    378 	a := optoas(op, nl.Type)
    379 
    380 	if nr.Op == gc.OLITERAL {
    381 		var n1 gc.Node
    382 		gc.Regalloc(&n1, nl.Type, res)
    383 		gc.Cgen(nl, &n1)
    384 		sc := uint64(nr.Int())
    385 		if sc >= uint64(nl.Type.Width*8) {
    386 			// large shift gets 2 shifts by width-1
    387 			var n3 gc.Node
    388 			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
    389 
    390 			gins(a, &n3, &n1)
    391 			gins(a, &n3, &n1)
    392 		} else {
    393 			gins(a, nr, &n1)
    394 		}
    395 		gmove(&n1, res)
    396 		gc.Regfree(&n1)
    397 		return
    398 	}
    399 
    400 	if nl.Ullman >= gc.UINF {
    401 		var n4 gc.Node
    402 		gc.Tempname(&n4, nl.Type)
    403 		gc.Cgen(nl, &n4)
    404 		nl = &n4
    405 	}
    406 
    407 	if nr.Ullman >= gc.UINF {
    408 		var n5 gc.Node
    409 		gc.Tempname(&n5, nr.Type)
    410 		gc.Cgen(nr, &n5)
    411 		nr = &n5
    412 	}
    413 
    414 	rcx := gc.GetReg(x86.REG_CX)
    415 	var n1 gc.Node
    416 	gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
    417 
    418 	// Allow either uint32 or uint64 as shift type,
    419 	// to avoid unnecessary conversion from uint32 to uint64
    420 	// just to do the comparison.
    421 	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]
    422 
    423 	if tcount.Etype < gc.TUINT32 {
    424 		tcount = gc.Types[gc.TUINT32]
    425 	}
    426 
    427 	gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
    428 	var n3 gc.Node
    429 	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX
    430 
    431 	var cx gc.Node
    432 	gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX)
    433 
    434 	var oldcx gc.Node
    435 	if rcx > 0 && !gc.Samereg(&cx, res) {
    436 		gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil)
    437 		gmove(&cx, &oldcx)
    438 	}
    439 
    440 	cx.Type = tcount
    441 
    442 	var n2 gc.Node
    443 	if gc.Samereg(&cx, res) {
    444 		gc.Regalloc(&n2, nl.Type, nil)
    445 	} else {
    446 		gc.Regalloc(&n2, nl.Type, res)
    447 	}
    448 	if nl.Ullman >= nr.Ullman {
    449 		gc.Cgen(nl, &n2)
    450 		gc.Cgen(nr, &n1)
    451 		gmove(&n1, &n3)
    452 	} else {
    453 		gc.Cgen(nr, &n1)
    454 		gmove(&n1, &n3)
    455 		gc.Cgen(nl, &n2)
    456 	}
    457 
    458 	gc.Regfree(&n3)
    459 
    460 	// test and fix up large shifts
    461 	if !bounded {
    462 		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
    463 		gins(optoas(gc.OCMP, tcount), &n1, &n3)
    464 		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)
    465 		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
    466 			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
    467 			gins(a, &n3, &n2)
    468 		} else {
    469 			gc.Nodconst(&n3, nl.Type, 0)
    470 			gmove(&n3, &n2)
    471 		}
    472 
    473 		gc.Patch(p1, gc.Pc)
    474 	}
    475 
    476 	gins(a, &n1, &n2)
    477 
    478 	if oldcx.Op != 0 {
    479 		cx.Type = gc.Types[gc.TUINT64]
    480 		gmove(&oldcx, &cx)
    481 		gc.Regfree(&oldcx)
    482 	}
    483 
    484 	gmove(&n2, res)
    485 
    486 	gc.Regfree(&n1)
    487 	gc.Regfree(&n2)
    488 }
    489 
    490 /*
    491  * generate byte multiply:
    492  *	res = nl * nr
    493  * there is no 2-operand byte multiply instruction so
    494  * we do a full-width multiplication and truncate afterwards.
    495  */
    496 func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
    497 	if optoas(op, nl.Type) != x86.AIMULB {
    498 		return false
    499 	}
    500 
    501 	// largest ullman on left.
    502 	if nl.Ullman < nr.Ullman {
    503 		tmp := nl
    504 		nl = nr
    505 		nr = tmp
    506 	}
    507 
    508 	// generate operands in "8-bit" registers.
    509 	var n1b gc.Node
    510 	gc.Regalloc(&n1b, nl.Type, res)
    511 
    512 	gc.Cgen(nl, &n1b)
    513 	var n2b gc.Node
    514 	gc.Regalloc(&n2b, nr.Type, nil)
    515 	gc.Cgen(nr, &n2b)
    516 
    517 	// perform full-width multiplication.
    518 	t := gc.Types[gc.TUINT64]
    519 
    520 	if gc.Issigned[nl.Type.Etype] {
    521 		t = gc.Types[gc.TINT64]
    522 	}
    523 	var n1 gc.Node
    524 	gc.Nodreg(&n1, t, int(n1b.Reg))
    525 	var n2 gc.Node
    526 	gc.Nodreg(&n2, t, int(n2b.Reg))
    527 	a := optoas(op, t)
    528 	gins(a, &n2, &n1)
    529 
    530 	// truncate.
    531 	gmove(&n1, res)
    532 
    533 	gc.Regfree(&n1b)
    534 	gc.Regfree(&n2b)
    535 	return true
    536 }
    537 
    538 func clearfat(nl *gc.Node) {
    539 	/* clear a fat object */
    540 	if gc.Debug['g'] != 0 {
    541 		gc.Dump("\nclearfat", nl)
    542 	}
    543 
    544 	w := nl.Type.Width
    545 
    546 	// Avoid taking the address for simple enough types.
    547 	if gc.Componentgen(nil, nl) {
    548 		return
    549 	}
    550 
    551 	c := w % 8 // bytes
    552 	q := w / 8 // quads
    553 
    554 	if q < 4 {
    555 		// Write sequence of MOV 0, off(base) instead of using STOSQ.
    556 		// The hope is that although the code will be slightly longer,
    557 		// the MOVs will have no dependencies and pipeline better
    558 		// than the unrolled STOSQ loop.
    559 		// NOTE: Must use agen, not igen, so that optimizer sees address
    560 		// being taken. We are not writing on field boundaries.
    561 		var n1 gc.Node
    562 		gc.Agenr(nl, &n1, nil)
    563 
    564 		n1.Op = gc.OINDREG
    565 		var z gc.Node
    566 		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
    567 		for {
    568 			tmp14 := q
    569 			q--
    570 			if tmp14 <= 0 {
    571 				break
    572 			}
    573 			n1.Type = z.Type
    574 			gins(x86.AMOVQ, &z, &n1)
    575 			n1.Xoffset += 8
    576 		}
    577 
    578 		if c >= 4 {
    579 			gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
    580 			n1.Type = z.Type
    581 			gins(x86.AMOVL, &z, &n1)
    582 			n1.Xoffset += 4
    583 			c -= 4
    584 		}
    585 
    586 		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
    587 		for {
    588 			tmp15 := c
    589 			c--
    590 			if tmp15 <= 0 {
    591 				break
    592 			}
    593 			n1.Type = z.Type
    594 			gins(x86.AMOVB, &z, &n1)
    595 			n1.Xoffset++
    596 		}
    597 
    598 		gc.Regfree(&n1)
    599 		return
    600 	}
    601 
    602 	var oldn1 gc.Node
    603 	var n1 gc.Node
    604 	savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
    605 	gc.Agen(nl, &n1)
    606 
    607 	var ax gc.Node
    608 	var oldax gc.Node
    609 	savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
    610 	gconreg(x86.AMOVL, 0, x86.REG_AX)
    611 
    612 	if q > 128 || gc.Nacl {
    613 		gconreg(movptr, q, x86.REG_CX)
    614 		gins(x86.AREP, nil, nil)   // repeat
    615 		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+
    616 	} else {
    617 		if di := dzDI(q); di != 0 {
    618 			gconreg(addptr, di, x86.REG_DI)
    619 		}
    620 		p := gins(obj.ADUFFZERO, nil, nil)
    621 		p.To.Type = obj.TYPE_ADDR
    622 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    623 		p.To.Offset = dzOff(q)
    624 	}
    625 
    626 	z := ax
    627 	di := n1
    628 	if w >= 8 && c >= 4 {
    629 		di.Op = gc.OINDREG
    630 		z.Type = gc.Types[gc.TINT64]
    631 		di.Type = z.Type
    632 		p := gins(x86.AMOVQ, &z, &di)
    633 		p.To.Scale = 1
    634 		p.To.Offset = c - 8
    635 	} else if c >= 4 {
    636 		di.Op = gc.OINDREG
    637 		z.Type = gc.Types[gc.TINT32]
    638 		di.Type = z.Type
    639 		gins(x86.AMOVL, &z, &di)
    640 		if c > 4 {
    641 			p := gins(x86.AMOVL, &z, &di)
    642 			p.To.Scale = 1
    643 			p.To.Offset = c - 4
    644 		}
    645 	} else {
    646 		for c > 0 {
    647 			gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
    648 			c--
    649 		}
    650 	}
    651 
    652 	restx(&n1, &oldn1)
    653 	restx(&ax, &oldax)
    654 }
    655 
    656 // Called after regopt and peep have run.
    657 // Expand CHECKNIL pseudo-op into actual nil pointer check.
    658 func expandchecks(firstp *obj.Prog) {
    659 	var p1 *obj.Prog
    660 	var p2 *obj.Prog
    661 
    662 	for p := firstp; p != nil; p = p.Link {
    663 		if p.As != obj.ACHECKNIL {
    664 			continue
    665 		}
    666 		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
    667 			gc.Warnl(int(p.Lineno), "generated nil check")
    668 		}
    669 
    670 		// check is
    671 		//	CMP arg, $0
    672 		//	JNE 2(PC) (likely)
    673 		//	MOV AX, 0
    674 		p1 = gc.Ctxt.NewProg()
    675 
    676 		p2 = gc.Ctxt.NewProg()
    677 		gc.Clearp(p1)
    678 		gc.Clearp(p2)
    679 		p1.Link = p2
    680 		p2.Link = p.Link
    681 		p.Link = p1
    682 		p1.Lineno = p.Lineno
    683 		p2.Lineno = p.Lineno
    684 		p1.Pc = 9999
    685 		p2.Pc = 9999
    686 		p.As = int16(cmpptr)
    687 		p.To.Type = obj.TYPE_CONST
    688 		p.To.Offset = 0
    689 		p1.As = x86.AJNE
    690 		p1.From.Type = obj.TYPE_CONST
    691 		p1.From.Offset = 1 // likely
    692 		p1.To.Type = obj.TYPE_BRANCH
    693 		p1.To.Val = p2.Link
    694 
    695 		// crash by write to memory address 0.
    696 		// if possible, since we know arg is 0, use 0(arg),
    697 		// which will be shorter to encode than plain 0.
    698 		p2.As = x86.AMOVL
    699 
    700 		p2.From.Type = obj.TYPE_REG
    701 		p2.From.Reg = x86.REG_AX
    702 		if regtyp(&p.From) {
    703 			p2.To.Type = obj.TYPE_MEM
    704 			p2.To.Reg = p.From.Reg
    705 		} else {
    706 			p2.To.Type = obj.TYPE_MEM
    707 			p2.To.Reg = x86.REG_NONE
    708 		}
    709 
    710 		p2.To.Offset = 0
    711 	}
    712 }
    713 
    714 // addr += index*width if possible.
    715 func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
    716 	switch width {
    717 	case 1, 2, 4, 8:
    718 		p1 := gins(x86.ALEAQ, index, addr)
    719 		p1.From.Type = obj.TYPE_MEM
    720 		p1.From.Scale = int16(width)
    721 		p1.From.Index = p1.From.Reg
    722 		p1.From.Reg = p1.To.Reg
    723 		return true
    724 	}
    725 	return false
    726 }
    727 
    728 // res = runtime.getg()
    729 func getg(res *gc.Node) {
    730 	var n1 gc.Node
    731 	gc.Regalloc(&n1, res.Type, res)
    732 	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
    733 	p := gins(mov, nil, &n1)
    734 	p.From.Type = obj.TYPE_REG
    735 	p.From.Reg = x86.REG_TLS
    736 	p = gins(mov, nil, &n1)
    737 	p.From = p.To
    738 	p.From.Type = obj.TYPE_MEM
    739 	p.From.Index = x86.REG_TLS
    740 	p.From.Scale = 1
    741 	gmove(&n1, res)
    742 	gc.Regfree(&n1)
    743 }
    744