Home | History | Annotate | Download | only in gc
      1 // Derived from Inferno utils/6c/reg.c
      2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
      3 //
      4 //	Copyright  1994-1999 Lucent Technologies Inc.  All rights reserved.
      5 //	Portions Copyright  1995-1997 C H Forsyth (forsyth (a] terzarima.net)
      6 //	Portions Copyright  1997-1999 Vita Nuova Limited
      7 //	Portions Copyright  2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
      8 //	Portions Copyright  2004,2006 Bruce Ellis
      9 //	Portions Copyright  2005-2007 C H Forsyth (forsyth (a] terzarima.net)
     10 //	Revisions Copyright  2000-2007 Lucent Technologies Inc. and others
     11 //	Portions Copyright  2009 The Go Authors.  All rights reserved.
     12 //
     13 // Permission is hereby granted, free of charge, to any person obtaining a copy
     14 // of this software and associated documentation files (the "Software"), to deal
     15 // in the Software without restriction, including without limitation the rights
     16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     17 // copies of the Software, and to permit persons to whom the Software is
     18 // furnished to do so, subject to the following conditions:
     19 //
     20 // The above copyright notice and this permission notice shall be included in
     21 // all copies or substantial portions of the Software.
     22 //
     23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     29 // THE SOFTWARE.
     30 
     31 package gc
     32 
     33 import (
     34 	"bytes"
     35 	"cmd/internal/obj"
     36 	"fmt"
     37 	"sort"
     38 	"strings"
     39 )
     40 
     41 // A Var represents a single variable that may be stored in a register.
     42 // That variable may itself correspond to a hardware register,
     43 // to represent the use of registers in the unoptimized instruction stream.
     44 type Var struct {
     45 	offset     int64
     46 	node       *Node
     47 	nextinnode *Var
     48 	width      int
     49 	id         int // index in vars
     50 	name       int8
     51 	etype      int8
     52 	addr       int8
     53 }
     54 
     55 // Bits represents a set of Vars, stored as a bit set of var numbers
     56 // (the index in vars, or equivalently v.id).
     57 type Bits struct {
     58 	b [BITS]uint64
     59 }
     60 
     61 const (
     62 	BITS = 3
     63 	NVAR = BITS * 64
     64 )
     65 
     66 var (
     67 	vars [NVAR]Var // variables under consideration
     68 	nvar int       // number of vars
     69 
     70 	regbits uint64 // bits for hardware registers
     71 
     72 	zbits   Bits // zero
     73 	externs Bits // global variables
     74 	params  Bits // function parameters and results
     75 	ivar    Bits // function parameters (inputs)
     76 	ovar    Bits // function results (outputs)
     77 	consts  Bits // constant values
     78 	addrs   Bits // variables with address taken
     79 )
     80 
     81 // A Reg is a wrapper around a single Prog (one instruction) that holds
     82 // register optimization information while the optimizer runs.
     83 // r->prog is the instruction.
     84 type Reg struct {
     85 	set  Bits // regopt variables written by this instruction.
     86 	use1 Bits // regopt variables read by prog->from.
     87 	use2 Bits // regopt variables read by prog->to.
     88 
     89 	// refahead/refbehind are the regopt variables whose current
     90 	// value may be used in the following/preceding instructions
     91 	// up to a CALL (or the value is clobbered).
     92 	refbehind Bits
     93 	refahead  Bits
     94 
     95 	// calahead/calbehind are similar, but for variables in
     96 	// instructions that are reachable after hitting at least one
     97 	// CALL.
     98 	calbehind Bits
     99 	calahead  Bits
    100 
    101 	regdiff Bits
    102 	act     Bits
    103 	regu    uint64 // register used bitmap
    104 }
    105 
    106 // A Rgn represents a single regopt variable over a region of code
    107 // where a register could potentially be dedicated to that variable.
    108 // The code encompassed by a Rgn is defined by the flow graph,
    109 // starting at enter, flood-filling forward while varno is refahead
    110 // and backward while varno is refbehind, and following branches.
    111 // A single variable may be represented by multiple disjoint Rgns and
    112 // each Rgn may choose a different register for that variable.
    113 // Registers are allocated to regions greedily in order of descending
    114 // cost.
    115 type Rgn struct {
    116 	enter *Flow
    117 	cost  int16
    118 	varno int16
    119 	regno int16
    120 }
    121 
    122 // The Plan 9 C compilers used a limit of 600 regions,
    123 // but the yacc-generated parser in y.go has 3100 regions.
    124 // We set MaxRgn large enough to handle that.
    125 // There's not a huge cost to having too many regions:
    126 // the main processing traces the live area for each variable,
    127 // which is limited by the number of variables times the area,
    128 // not the raw region count. If there are many regions, they
    129 // are almost certainly small and easy to trace.
    130 // The only operation that scales with region count is the
    131 // sorting by cost, which uses sort.Sort and is therefore
    132 // guaranteed n log n.
    133 const MaxRgn = 6000
    134 
    135 var (
    136 	region  []Rgn
    137 	nregion int
    138 )
    139 
    140 type rcmp []Rgn
    141 
    142 func (x rcmp) Len() int {
    143 	return len(x)
    144 }
    145 
    146 func (x rcmp) Swap(i, j int) {
    147 	x[i], x[j] = x[j], x[i]
    148 }
    149 
    150 func (x rcmp) Less(i, j int) bool {
    151 	p1 := &x[i]
    152 	p2 := &x[j]
    153 	if p1.cost != p2.cost {
    154 		return int(p2.cost)-int(p1.cost) < 0
    155 	}
    156 	if p1.varno != p2.varno {
    157 		return int(p2.varno)-int(p1.varno) < 0
    158 	}
    159 	if p1.enter != p2.enter {
    160 		return int(p2.enter.Id-p1.enter.Id) < 0
    161 	}
    162 	return false
    163 }
    164 
    165 func setaddrs(bit Bits) {
    166 	var i int
    167 	var n int
    168 	var v *Var
    169 	var node *Node
    170 
    171 	for bany(&bit) {
    172 		// convert each bit to a variable
    173 		i = bnum(bit)
    174 
    175 		node = vars[i].node
    176 		n = int(vars[i].name)
    177 		biclr(&bit, uint(i))
    178 
    179 		// disable all pieces of that variable
    180 		for i = 0; i < nvar; i++ {
    181 			v = &vars[i]
    182 			if v.node == node && int(v.name) == n {
    183 				v.addr = 2
    184 			}
    185 		}
    186 	}
    187 }
    188 
    189 var regnodes [64]*Node
    190 
    191 func walkvardef(n *Node, f *Flow, active int) {
    192 	var f1 *Flow
    193 	var bn int
    194 	var v *Var
    195 
    196 	for f1 = f; f1 != nil; f1 = f1.S1 {
    197 		if f1.Active == int32(active) {
    198 			break
    199 		}
    200 		f1.Active = int32(active)
    201 		if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
    202 			break
    203 		}
    204 		for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
    205 			bn = v.id
    206 			biset(&(f1.Data.(*Reg)).act, uint(bn))
    207 		}
    208 
    209 		if f1.Prog.As == obj.ACALL {
    210 			break
    211 		}
    212 	}
    213 
    214 	for f2 := f; f2 != f1; f2 = f2.S1 {
    215 		if f2.S2 != nil {
    216 			walkvardef(n, f2.S2, active)
    217 		}
    218 	}
    219 }
    220 
    221 /*
    222  * add mov b,rn
    223  * just after r
    224  */
    225 func addmove(r *Flow, bn int, rn int, f int) {
    226 	p1 := Ctxt.NewProg()
    227 	Clearp(p1)
    228 	p1.Pc = 9999
    229 
    230 	p := r.Prog
    231 	p1.Link = p.Link
    232 	p.Link = p1
    233 	p1.Lineno = p.Lineno
    234 
    235 	v := &vars[bn]
    236 
    237 	a := &p1.To
    238 	a.Offset = v.offset
    239 	a.Etype = uint8(v.etype)
    240 	a.Type = obj.TYPE_MEM
    241 	a.Name = v.name
    242 	a.Node = v.node
    243 	a.Sym = Linksym(v.node.Sym)
    244 
    245 	/* NOTE(rsc): 9g did
    246 	if(a->etype == TARRAY)
    247 		a->type = TYPE_ADDR;
    248 	else if(a->sym == nil)
    249 		a->type = TYPE_CONST;
    250 	*/
    251 	p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)]))
    252 
    253 	// TODO(rsc): Remove special case here.
    254 	if (Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL {
    255 		p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8]))
    256 	}
    257 	p1.From.Type = obj.TYPE_REG
    258 	p1.From.Reg = int16(rn)
    259 	p1.From.Name = obj.NAME_NONE
    260 	if f == 0 {
    261 		p1.From = *a
    262 		*a = obj.Addr{}
    263 		a.Type = obj.TYPE_REG
    264 		a.Reg = int16(rn)
    265 	}
    266 
    267 	if Debug['R'] != 0 && Debug['v'] != 0 {
    268 		fmt.Printf("%v ===add=== %v\n", p, p1)
    269 	}
    270 	Ostats.Nspill++
    271 }
    272 
    273 func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
    274 	t1 := o1 + int64(w1)
    275 	t2 := o2 + int64(w2)
    276 
    277 	if t1 <= o2 || t2 <= o1 {
    278 		return false
    279 	}
    280 
    281 	return true
    282 }
    283 
    284 func mkvar(f *Flow, a *obj.Addr) Bits {
    285 	/*
    286 	 * mark registers used
    287 	 */
    288 	if a.Type == obj.TYPE_NONE {
    289 		return zbits
    290 	}
    291 
    292 	r := f.Data.(*Reg)
    293 	r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
    294 
    295 	var n int
    296 	switch a.Type {
    297 	default:
    298 		regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
    299 		if regu == 0 {
    300 			return zbits
    301 		}
    302 		bit := zbits
    303 		bit.b[0] = regu
    304 		return bit
    305 
    306 		// TODO(rsc): Remove special case here.
    307 	case obj.TYPE_ADDR:
    308 		var bit Bits
    309 		if Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' {
    310 			goto memcase
    311 		}
    312 		a.Type = obj.TYPE_MEM
    313 		bit = mkvar(f, a)
    314 		setaddrs(bit)
    315 		a.Type = obj.TYPE_ADDR
    316 		Ostats.Naddr++
    317 		return zbits
    318 
    319 	memcase:
    320 		fallthrough
    321 
    322 	case obj.TYPE_MEM:
    323 		if r != nil {
    324 			r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
    325 		}
    326 
    327 		/* NOTE: 5g did
    328 		if(r->f.prog->scond & (C_PBIT|C_WBIT))
    329 			r->set.b[0] |= RtoB(a->reg);
    330 		*/
    331 		switch a.Name {
    332 		default:
    333 			// Note: This case handles NAME_EXTERN and NAME_STATIC.
    334 			// We treat these as requiring eager writes to memory, due to
    335 			// the possibility of a fault handler looking at them, so there is
    336 			// not much point in registerizing the loads.
    337 			// If we later choose the set of candidate variables from a
    338 			// larger list, these cases could be deprioritized instead of
    339 			// removed entirely.
    340 			return zbits
    341 
    342 		case obj.NAME_PARAM,
    343 			obj.NAME_AUTO:
    344 			n = int(a.Name)
    345 		}
    346 	}
    347 
    348 	node, _ := a.Node.(*Node)
    349 	if node == nil || node.Op != ONAME || node.Orig == nil {
    350 		return zbits
    351 	}
    352 	node = node.Orig
    353 	if node.Orig != node {
    354 		Fatal("%v: bad node", Ctxt.Dconv(a))
    355 	}
    356 	if node.Sym == nil || node.Sym.Name[0] == '.' {
    357 		return zbits
    358 	}
    359 	et := int(a.Etype)
    360 	o := a.Offset
    361 	w := a.Width
    362 	if w < 0 {
    363 		Fatal("bad width %d for %v", w, Ctxt.Dconv(a))
    364 	}
    365 
    366 	flag := 0
    367 	var v *Var
    368 	for i := 0; i < nvar; i++ {
    369 		v = &vars[i]
    370 		if v.node == node && int(v.name) == n {
    371 			if v.offset == o {
    372 				if int(v.etype) == et {
    373 					if int64(v.width) == w {
    374 						// TODO(rsc): Remove special case for arm here.
    375 						if flag == 0 || Thearch.Thechar != '5' {
    376 							return blsh(uint(i))
    377 						}
    378 					}
    379 				}
    380 			}
    381 
    382 			// if they overlap, disable both
    383 			if overlap_reg(v.offset, v.width, o, int(w)) {
    384 				//				print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
    385 				v.addr = 1
    386 
    387 				flag = 1
    388 			}
    389 		}
    390 	}
    391 
    392 	switch et {
    393 	case 0, TFUNC:
    394 		return zbits
    395 	}
    396 
    397 	if nvar >= NVAR {
    398 		if Debug['w'] > 1 && node != nil {
    399 			Fatal("variable not optimized: %v", Nconv(node, obj.FmtSharp))
    400 		}
    401 		if Debug['v'] > 0 {
    402 			Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp))
    403 		}
    404 
    405 		// If we're not tracking a word in a variable, mark the rest as
    406 		// having its address taken, so that we keep the whole thing
    407 		// live at all calls. otherwise we might optimize away part of
    408 		// a variable but not all of it.
    409 		var v *Var
    410 		for i := 0; i < nvar; i++ {
    411 			v = &vars[i]
    412 			if v.node == node {
    413 				v.addr = 1
    414 			}
    415 		}
    416 
    417 		return zbits
    418 	}
    419 
    420 	i := nvar
    421 	nvar++
    422 	v = &vars[i]
    423 	v.id = i
    424 	v.offset = o
    425 	v.name = int8(n)
    426 	v.etype = int8(et)
    427 	v.width = int(w)
    428 	v.addr = int8(flag) // funny punning
    429 	v.node = node
    430 
    431 	// node->opt is the head of a linked list
    432 	// of Vars within the given Node, so that
    433 	// we can start at a Var and find all the other
    434 	// Vars in the same Go variable.
    435 	v.nextinnode, _ = node.Opt().(*Var)
    436 
    437 	node.SetOpt(v)
    438 
    439 	bit := blsh(uint(i))
    440 	if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
    441 		for z := 0; z < BITS; z++ {
    442 			externs.b[z] |= bit.b[z]
    443 		}
    444 	}
    445 	if n == obj.NAME_PARAM {
    446 		for z := 0; z < BITS; z++ {
    447 			params.b[z] |= bit.b[z]
    448 		}
    449 	}
    450 
    451 	if node.Class == PPARAM {
    452 		for z := 0; z < BITS; z++ {
    453 			ivar.b[z] |= bit.b[z]
    454 		}
    455 	}
    456 	if node.Class == PPARAMOUT {
    457 		for z := 0; z < BITS; z++ {
    458 			ovar.b[z] |= bit.b[z]
    459 		}
    460 	}
    461 
    462 	// Treat values with their address taken as live at calls,
    463 	// because the garbage collector's liveness analysis in ../gc/plive.c does.
    464 	// These must be consistent or else we will elide stores and the garbage
    465 	// collector will see uninitialized data.
    466 	// The typical case where our own analysis is out of sync is when the
    467 	// node appears to have its address taken but that code doesn't actually
    468 	// get generated and therefore doesn't show up as an address being
    469 	// taken when we analyze the instruction stream.
    470 	// One instance of this case is when a closure uses the same name as
    471 	// an outer variable for one of its own variables declared with :=.
    472 	// The parser flags the outer variable as possibly shared, and therefore
    473 	// sets addrtaken, even though it ends up not being actually shared.
    474 	// If we were better about _ elision, _ = &x would suffice too.
    475 	// The broader := in a closure problem is mentioned in a comment in
    476 	// closure.c:/^typecheckclosure and dcl.c:/^oldname.
    477 	if node.Addrtaken {
    478 		v.addr = 1
    479 	}
    480 
    481 	// Disable registerization for globals, because:
    482 	// (1) we might panic at any time and we want the recovery code
    483 	// to see the latest values (issue 1304).
    484 	// (2) we don't know what pointers might point at them and we want
    485 	// loads via those pointers to see updated values and vice versa (issue 7995).
    486 	//
    487 	// Disable registerization for results if using defer, because the deferred func
    488 	// might recover and return, causing the current values to be used.
    489 	if node.Class == PEXTERN || (Hasdefer != 0 && node.Class == PPARAMOUT) {
    490 		v.addr = 1
    491 	}
    492 
    493 	if Debug['R'] != 0 {
    494 		fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(int(et), 0), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr)
    495 	}
    496 	Ostats.Nvar++
    497 
    498 	return bit
    499 }
    500 
    501 var change int
    502 
    503 func prop(f *Flow, ref Bits, cal Bits) {
    504 	var f1 *Flow
    505 	var r1 *Reg
    506 	var z int
    507 	var i int
    508 	var v *Var
    509 	var v1 *Var
    510 
    511 	for f1 = f; f1 != nil; f1 = f1.P1 {
    512 		r1 = f1.Data.(*Reg)
    513 		for z = 0; z < BITS; z++ {
    514 			ref.b[z] |= r1.refahead.b[z]
    515 			if ref.b[z] != r1.refahead.b[z] {
    516 				r1.refahead.b[z] = ref.b[z]
    517 				change = 1
    518 			}
    519 
    520 			cal.b[z] |= r1.calahead.b[z]
    521 			if cal.b[z] != r1.calahead.b[z] {
    522 				r1.calahead.b[z] = cal.b[z]
    523 				change = 1
    524 			}
    525 		}
    526 
    527 		switch f1.Prog.As {
    528 		case obj.ACALL:
    529 			if Noreturn(f1.Prog) {
    530 				break
    531 			}
    532 
    533 			// Mark all input variables (ivar) as used, because that's what the
    534 			// liveness bitmaps say. The liveness bitmaps say that so that a
    535 			// panic will not show stale values in the parameter dump.
    536 			// Mark variables with a recent VARDEF (r1->act) as used,
    537 			// so that the optimizer flushes initializations to memory,
    538 			// so that if a garbage collection happens during this CALL,
    539 			// the collector will see initialized memory. Again this is to
    540 			// match what the liveness bitmaps say.
    541 			for z = 0; z < BITS; z++ {
    542 				cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
    543 				ref.b[z] = 0
    544 			}
    545 
    546 			// cal.b is the current approximation of what's live across the call.
    547 			// Every bit in cal.b is a single stack word. For each such word,
    548 			// find all the other tracked stack words in the same Go variable
    549 			// (struct/slice/string/interface) and mark them live too.
    550 			// This is necessary because the liveness analysis for the garbage
    551 			// collector works at variable granularity, not at word granularity.
    552 			// It is fundamental for slice/string/interface: the garbage collector
    553 			// needs the whole value, not just some of the words, in order to
    554 			// interpret the other bits correctly. Specifically, slice needs a consistent
    555 			// ptr and cap, string needs a consistent ptr and len, and interface
    556 			// needs a consistent type word and data word.
    557 			for z = 0; z < BITS; z++ {
    558 				if cal.b[z] == 0 {
    559 					continue
    560 				}
    561 				for i = 0; i < 64; i++ {
    562 					if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
    563 						continue
    564 					}
    565 					v = &vars[z*64+i]
    566 					if v.node.Opt() == nil { // v represents fixed register, not Go variable
    567 						continue
    568 					}
    569 
    570 					// v->node->opt is the head of a linked list of Vars
    571 					// corresponding to tracked words from the Go variable v->node.
    572 					// Walk the list and set all the bits.
    573 					// For a large struct this could end up being quadratic:
    574 					// after the first setting, the outer loop (for z, i) would see a 1 bit
    575 					// for all of the remaining words in the struct, and for each such
    576 					// word would go through and turn on all the bits again.
    577 					// To avoid the quadratic behavior, we only turn on the bits if
    578 					// v is the head of the list or if the head's bit is not yet turned on.
    579 					// This will set the bits at most twice, keeping the overall loop linear.
    580 					v1, _ = v.node.Opt().(*Var)
    581 
    582 					if v == v1 || !btest(&cal, uint(v1.id)) {
    583 						for ; v1 != nil; v1 = v1.nextinnode {
    584 							biset(&cal, uint(v1.id))
    585 						}
    586 					}
    587 				}
    588 			}
    589 
    590 		case obj.ATEXT:
    591 			for z = 0; z < BITS; z++ {
    592 				cal.b[z] = 0
    593 				ref.b[z] = 0
    594 			}
    595 
    596 		case obj.ARET:
    597 			for z = 0; z < BITS; z++ {
    598 				cal.b[z] = externs.b[z] | ovar.b[z]
    599 				ref.b[z] = 0
    600 			}
    601 		}
    602 
    603 		for z = 0; z < BITS; z++ {
    604 			ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
    605 			cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
    606 			r1.refbehind.b[z] = ref.b[z]
    607 			r1.calbehind.b[z] = cal.b[z]
    608 		}
    609 
    610 		if f1.Active != 0 {
    611 			break
    612 		}
    613 		f1.Active = 1
    614 	}
    615 
    616 	var r *Reg
    617 	var f2 *Flow
    618 	for ; f != f1; f = f.P1 {
    619 		r = f.Data.(*Reg)
    620 		for f2 = f.P2; f2 != nil; f2 = f2.P2link {
    621 			prop(f2, r.refbehind, r.calbehind)
    622 		}
    623 	}
    624 }
    625 
    626 func synch(f *Flow, dif Bits) {
    627 	var r1 *Reg
    628 	var z int
    629 
    630 	for f1 := f; f1 != nil; f1 = f1.S1 {
    631 		r1 = f1.Data.(*Reg)
    632 		for z = 0; z < BITS; z++ {
    633 			dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
    634 			if dif.b[z] != r1.regdiff.b[z] {
    635 				r1.regdiff.b[z] = dif.b[z]
    636 				change = 1
    637 			}
    638 		}
    639 
    640 		if f1.Active != 0 {
    641 			break
    642 		}
    643 		f1.Active = 1
    644 		for z = 0; z < BITS; z++ {
    645 			dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
    646 		}
    647 		if f1.S2 != nil {
    648 			synch(f1.S2, dif)
    649 		}
    650 	}
    651 }
    652 
    653 func allreg(b uint64, r *Rgn) uint64 {
    654 	v := &vars[r.varno]
    655 	r.regno = 0
    656 	switch v.etype {
    657 	default:
    658 		Fatal("unknown etype %d/%v", Bitno(b), Econv(int(v.etype), 0))
    659 
    660 	case TINT8,
    661 		TUINT8,
    662 		TINT16,
    663 		TUINT16,
    664 		TINT32,
    665 		TUINT32,
    666 		TINT64,
    667 		TUINT64,
    668 		TINT,
    669 		TUINT,
    670 		TUINTPTR,
    671 		TBOOL,
    672 		TPTR32,
    673 		TPTR64:
    674 		i := Thearch.BtoR(^b)
    675 		if i != 0 && r.cost > 0 {
    676 			r.regno = int16(i)
    677 			return Thearch.RtoB(i)
    678 		}
    679 
    680 	case TFLOAT32, TFLOAT64:
    681 		i := Thearch.BtoF(^b)
    682 		if i != 0 && r.cost > 0 {
    683 			r.regno = int16(i)
    684 			return Thearch.FtoB(i)
    685 		}
    686 	}
    687 
    688 	return 0
    689 }
    690 
    691 func LOAD(r *Reg, z int) uint64 {
    692 	return ^r.refbehind.b[z] & r.refahead.b[z]
    693 }
    694 
    695 func STORE(r *Reg, z int) uint64 {
    696 	return ^r.calbehind.b[z] & r.calahead.b[z]
    697 }
    698 
    699 // Cost parameters
    700 const (
    701 	CLOAD = 5 // cost of load
    702 	CREF  = 5 // cost of reference if not registerized
    703 	LOOP  = 3 // loop execution count (applied in popt.go)
    704 )
    705 
    706 func paint1(f *Flow, bn int) {
    707 	z := bn / 64
    708 	bb := uint64(1 << uint(bn%64))
    709 	r := f.Data.(*Reg)
    710 	if r.act.b[z]&bb != 0 {
    711 		return
    712 	}
    713 	var f1 *Flow
    714 	var r1 *Reg
    715 	for {
    716 		if r.refbehind.b[z]&bb == 0 {
    717 			break
    718 		}
    719 		f1 = f.P1
    720 		if f1 == nil {
    721 			break
    722 		}
    723 		r1 = f1.Data.(*Reg)
    724 		if r1.refahead.b[z]&bb == 0 {
    725 			break
    726 		}
    727 		if r1.act.b[z]&bb != 0 {
    728 			break
    729 		}
    730 		f = f1
    731 		r = r1
    732 	}
    733 
    734 	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
    735 		change -= CLOAD * int(f.Loop)
    736 	}
    737 
    738 	for {
    739 		r.act.b[z] |= bb
    740 
    741 		if f.Prog.As != obj.ANOP { // don't give credit for NOPs
    742 			if r.use1.b[z]&bb != 0 {
    743 				change += CREF * int(f.Loop)
    744 			}
    745 			if (r.use2.b[z]|r.set.b[z])&bb != 0 {
    746 				change += CREF * int(f.Loop)
    747 			}
    748 		}
    749 
    750 		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
    751 			change -= CLOAD * int(f.Loop)
    752 		}
    753 
    754 		if r.refbehind.b[z]&bb != 0 {
    755 			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
    756 				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
    757 					paint1(f1, bn)
    758 				}
    759 			}
    760 		}
    761 
    762 		if r.refahead.b[z]&bb == 0 {
    763 			break
    764 		}
    765 		f1 = f.S2
    766 		if f1 != nil {
    767 			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
    768 				paint1(f1, bn)
    769 			}
    770 		}
    771 		f = f.S1
    772 		if f == nil {
    773 			break
    774 		}
    775 		r = f.Data.(*Reg)
    776 		if r.act.b[z]&bb != 0 {
    777 			break
    778 		}
    779 		if r.refbehind.b[z]&bb == 0 {
    780 			break
    781 		}
    782 	}
    783 }
    784 
    785 func paint2(f *Flow, bn int, depth int) uint64 {
    786 	z := bn / 64
    787 	bb := uint64(1 << uint(bn%64))
    788 	vreg := regbits
    789 	r := f.Data.(*Reg)
    790 	if r.act.b[z]&bb == 0 {
    791 		return vreg
    792 	}
    793 	var r1 *Reg
    794 	var f1 *Flow
    795 	for {
    796 		if r.refbehind.b[z]&bb == 0 {
    797 			break
    798 		}
    799 		f1 = f.P1
    800 		if f1 == nil {
    801 			break
    802 		}
    803 		r1 = f1.Data.(*Reg)
    804 		if r1.refahead.b[z]&bb == 0 {
    805 			break
    806 		}
    807 		if r1.act.b[z]&bb == 0 {
    808 			break
    809 		}
    810 		f = f1
    811 		r = r1
    812 	}
    813 
    814 	for {
    815 		if Debug['R'] != 0 && Debug['v'] != 0 {
    816 			fmt.Printf("  paint2 %d %v\n", depth, f.Prog)
    817 		}
    818 
    819 		r.act.b[z] &^= bb
    820 
    821 		vreg |= r.regu
    822 
    823 		if r.refbehind.b[z]&bb != 0 {
    824 			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
    825 				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
    826 					vreg |= paint2(f1, bn, depth+1)
    827 				}
    828 			}
    829 		}
    830 
    831 		if r.refahead.b[z]&bb == 0 {
    832 			break
    833 		}
    834 		f1 = f.S2
    835 		if f1 != nil {
    836 			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
    837 				vreg |= paint2(f1, bn, depth+1)
    838 			}
    839 		}
    840 		f = f.S1
    841 		if f == nil {
    842 			break
    843 		}
    844 		r = f.Data.(*Reg)
    845 		if r.act.b[z]&bb == 0 {
    846 			break
    847 		}
    848 		if r.refbehind.b[z]&bb == 0 {
    849 			break
    850 		}
    851 	}
    852 
    853 	return vreg
    854 }
    855 
    856 func paint3(f *Flow, bn int, rb uint64, rn int) {
    857 	z := bn / 64
    858 	bb := uint64(1 << uint(bn%64))
    859 	r := f.Data.(*Reg)
    860 	if r.act.b[z]&bb != 0 {
    861 		return
    862 	}
    863 	var r1 *Reg
    864 	var f1 *Flow
    865 	for {
    866 		if r.refbehind.b[z]&bb == 0 {
    867 			break
    868 		}
    869 		f1 = f.P1
    870 		if f1 == nil {
    871 			break
    872 		}
    873 		r1 = f1.Data.(*Reg)
    874 		if r1.refahead.b[z]&bb == 0 {
    875 			break
    876 		}
    877 		if r1.act.b[z]&bb != 0 {
    878 			break
    879 		}
    880 		f = f1
    881 		r = r1
    882 	}
    883 
    884 	if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
    885 		addmove(f, bn, rn, 0)
    886 	}
    887 	var p *obj.Prog
    888 	for {
    889 		r.act.b[z] |= bb
    890 		p = f.Prog
    891 
    892 		if r.use1.b[z]&bb != 0 {
    893 			if Debug['R'] != 0 && Debug['v'] != 0 {
    894 				fmt.Printf("%v", p)
    895 			}
    896 			addreg(&p.From, rn)
    897 			if Debug['R'] != 0 && Debug['v'] != 0 {
    898 				fmt.Printf(" ===change== %v\n", p)
    899 			}
    900 		}
    901 
    902 		if (r.use2.b[z]|r.set.b[z])&bb != 0 {
    903 			if Debug['R'] != 0 && Debug['v'] != 0 {
    904 				fmt.Printf("%v", p)
    905 			}
    906 			addreg(&p.To, rn)
    907 			if Debug['R'] != 0 && Debug['v'] != 0 {
    908 				fmt.Printf(" ===change== %v\n", p)
    909 			}
    910 		}
    911 
    912 		if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
    913 			addmove(f, bn, rn, 1)
    914 		}
    915 		r.regu |= rb
    916 
    917 		if r.refbehind.b[z]&bb != 0 {
    918 			for f1 = f.P2; f1 != nil; f1 = f1.P2link {
    919 				if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
    920 					paint3(f1, bn, rb, rn)
    921 				}
    922 			}
    923 		}
    924 
    925 		if r.refahead.b[z]&bb == 0 {
    926 			break
    927 		}
    928 		f1 = f.S2
    929 		if f1 != nil {
    930 			if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
    931 				paint3(f1, bn, rb, rn)
    932 			}
    933 		}
    934 		f = f.S1
    935 		if f == nil {
    936 			break
    937 		}
    938 		r = f.Data.(*Reg)
    939 		if r.act.b[z]&bb != 0 {
    940 			break
    941 		}
    942 		if r.refbehind.b[z]&bb == 0 {
    943 			break
    944 		}
    945 	}
    946 }
    947 
    948 func addreg(a *obj.Addr, rn int) {
    949 	a.Sym = nil
    950 	a.Node = nil
    951 	a.Offset = 0
    952 	a.Type = obj.TYPE_REG
    953 	a.Reg = int16(rn)
    954 	a.Name = 0
    955 
    956 	Ostats.Ncvtreg++
    957 }
    958 
    959 func dumpone(f *Flow, isreg int) {
    960 	fmt.Printf("%d:%v", f.Loop, f.Prog)
    961 	if isreg != 0 {
    962 		r := f.Data.(*Reg)
    963 		var bit Bits
    964 		for z := 0; z < BITS; z++ {
    965 			bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
    966 		}
    967 		if bany(&bit) {
    968 			fmt.Printf("\t")
    969 			if bany(&r.set) {
    970 				fmt.Printf(" s:%v", &r.set)
    971 			}
    972 			if bany(&r.use1) {
    973 				fmt.Printf(" u1:%v", &r.use1)
    974 			}
    975 			if bany(&r.use2) {
    976 				fmt.Printf(" u2:%v", &r.use2)
    977 			}
    978 			if bany(&r.refbehind) {
    979 				fmt.Printf(" rb:%v ", &r.refbehind)
    980 			}
    981 			if bany(&r.refahead) {
    982 				fmt.Printf(" ra:%v ", &r.refahead)
    983 			}
    984 			if bany(&r.calbehind) {
    985 				fmt.Printf(" cb:%v ", &r.calbehind)
    986 			}
    987 			if bany(&r.calahead) {
    988 				fmt.Printf(" ca:%v ", &r.calahead)
    989 			}
    990 			if bany(&r.regdiff) {
    991 				fmt.Printf(" d:%v ", &r.regdiff)
    992 			}
    993 			if bany(&r.act) {
    994 				fmt.Printf(" a:%v ", &r.act)
    995 			}
    996 		}
    997 	}
    998 
    999 	fmt.Printf("\n")
   1000 }
   1001 
   1002 func Dumpit(str string, r0 *Flow, isreg int) {
   1003 	var r1 *Flow
   1004 
   1005 	fmt.Printf("\n%s\n", str)
   1006 	for r := r0; r != nil; r = r.Link {
   1007 		dumpone(r, isreg)
   1008 		r1 = r.P2
   1009 		if r1 != nil {
   1010 			fmt.Printf("\tpred:")
   1011 			for ; r1 != nil; r1 = r1.P2link {
   1012 				fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
   1013 			}
   1014 			if r.P1 != nil {
   1015 				fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
   1016 			} else {
   1017 				fmt.Printf(" (only)")
   1018 			}
   1019 			fmt.Printf("\n")
   1020 		}
   1021 
   1022 		// Print successors if it's not just the next one
   1023 		if r.S1 != r.Link || r.S2 != nil {
   1024 			fmt.Printf("\tsucc:")
   1025 			if r.S1 != nil {
   1026 				fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
   1027 			}
   1028 			if r.S2 != nil {
   1029 				fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
   1030 			}
   1031 			fmt.Printf("\n")
   1032 		}
   1033 	}
   1034 }
   1035 
   1036 func regopt(firstp *obj.Prog) {
   1037 	mergetemp(firstp)
   1038 
   1039 	/*
   1040 	 * control flow is more complicated in generated go code
   1041 	 * than in generated c code.  define pseudo-variables for
   1042 	 * registers, so we have complete register usage information.
   1043 	 */
   1044 	var nreg int
   1045 	regnames := Thearch.Regnames(&nreg)
   1046 
   1047 	nvar = nreg
   1048 	for i := 0; i < nreg; i++ {
   1049 		vars[i] = Var{}
   1050 	}
   1051 	for i := 0; i < nreg; i++ {
   1052 		if regnodes[i] == nil {
   1053 			regnodes[i] = newname(Lookup(regnames[i]))
   1054 		}
   1055 		vars[i].node = regnodes[i]
   1056 	}
   1057 
   1058 	regbits = Thearch.Excludedregs()
   1059 	externs = zbits
   1060 	params = zbits
   1061 	consts = zbits
   1062 	addrs = zbits
   1063 	ivar = zbits
   1064 	ovar = zbits
   1065 
   1066 	/*
   1067 	 * pass 1
   1068 	 * build aux data structure
   1069 	 * allocate pcs
   1070 	 * find use and set of variables
   1071 	 */
   1072 	g := Flowstart(firstp, func() interface{} { return new(Reg) })
   1073 	if g == nil {
   1074 		for i := 0; i < nvar; i++ {
   1075 			vars[i].node.SetOpt(nil)
   1076 		}
   1077 		return
   1078 	}
   1079 
   1080 	firstf := g.Start
   1081 
   1082 	for f := firstf; f != nil; f = f.Link {
   1083 		p := f.Prog
   1084 		if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
   1085 			continue
   1086 		}
   1087 
   1088 		// Avoid making variables for direct-called functions.
   1089 		if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
   1090 			continue
   1091 		}
   1092 
   1093 		// from vs to doesn't matter for registers.
   1094 		r := f.Data.(*Reg)
   1095 		r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
   1096 		r.set.b[0] |= p.Info.Regset
   1097 
   1098 		bit := mkvar(f, &p.From)
   1099 		if bany(&bit) {
   1100 			if p.Info.Flags&LeftAddr != 0 {
   1101 				setaddrs(bit)
   1102 			}
   1103 			if p.Info.Flags&LeftRead != 0 {
   1104 				for z := 0; z < BITS; z++ {
   1105 					r.use1.b[z] |= bit.b[z]
   1106 				}
   1107 			}
   1108 			if p.Info.Flags&LeftWrite != 0 {
   1109 				for z := 0; z < BITS; z++ {
   1110 					r.set.b[z] |= bit.b[z]
   1111 				}
   1112 			}
   1113 		}
   1114 
   1115 		// Compute used register for reg
   1116 		if p.Info.Flags&RegRead != 0 {
   1117 			r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
   1118 		}
   1119 
   1120 		// Currently we never generate three register forms.
   1121 		// If we do, this will need to change.
   1122 		if p.From3Type() != obj.TYPE_NONE {
   1123 			Fatal("regopt not implemented for from3")
   1124 		}
   1125 
   1126 		bit = mkvar(f, &p.To)
   1127 		if bany(&bit) {
   1128 			if p.Info.Flags&RightAddr != 0 {
   1129 				setaddrs(bit)
   1130 			}
   1131 			if p.Info.Flags&RightRead != 0 {
   1132 				for z := 0; z < BITS; z++ {
   1133 					r.use2.b[z] |= bit.b[z]
   1134 				}
   1135 			}
   1136 			if p.Info.Flags&RightWrite != 0 {
   1137 				for z := 0; z < BITS; z++ {
   1138 					r.set.b[z] |= bit.b[z]
   1139 				}
   1140 			}
   1141 		}
   1142 	}
   1143 
   1144 	for i := 0; i < nvar; i++ {
   1145 		v := &vars[i]
   1146 		if v.addr != 0 {
   1147 			bit := blsh(uint(i))
   1148 			for z := 0; z < BITS; z++ {
   1149 				addrs.b[z] |= bit.b[z]
   1150 			}
   1151 		}
   1152 
   1153 		if Debug['R'] != 0 && Debug['v'] != 0 {
   1154 			fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(int(v.etype), 0), v.width, v.node, v.offset)
   1155 		}
   1156 	}
   1157 
   1158 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1159 		Dumpit("pass1", firstf, 1)
   1160 	}
   1161 
   1162 	/*
   1163 	 * pass 2
   1164 	 * find looping structure
   1165 	 */
   1166 	flowrpo(g)
   1167 
   1168 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1169 		Dumpit("pass2", firstf, 1)
   1170 	}
   1171 
   1172 	/*
   1173 	 * pass 2.5
   1174 	 * iterate propagating fat vardef covering forward
   1175 	 * r->act records vars with a VARDEF since the last CALL.
   1176 	 * (r->act will be reused in pass 5 for something else,
   1177 	 * but we'll be done with it by then.)
   1178 	 */
   1179 	active := 0
   1180 
   1181 	for f := firstf; f != nil; f = f.Link {
   1182 		f.Active = 0
   1183 		r := f.Data.(*Reg)
   1184 		r.act = zbits
   1185 	}
   1186 
   1187 	for f := firstf; f != nil; f = f.Link {
   1188 		p := f.Prog
   1189 		if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
   1190 			active++
   1191 			walkvardef(p.To.Node.(*Node), f, active)
   1192 		}
   1193 	}
   1194 
   1195 	/*
   1196 	 * pass 3
   1197 	 * iterate propagating usage
   1198 	 * 	back until flow graph is complete
   1199 	 */
   1200 	var f1 *Flow
   1201 	var i int
   1202 	var f *Flow
   1203 loop1:
   1204 	change = 0
   1205 
   1206 	for f = firstf; f != nil; f = f.Link {
   1207 		f.Active = 0
   1208 	}
   1209 	for f = firstf; f != nil; f = f.Link {
   1210 		if f.Prog.As == obj.ARET {
   1211 			prop(f, zbits, zbits)
   1212 		}
   1213 	}
   1214 
   1215 	/* pick up unreachable code */
   1216 loop11:
   1217 	i = 0
   1218 
   1219 	for f = firstf; f != nil; f = f1 {
   1220 		f1 = f.Link
   1221 		if f1 != nil && f1.Active != 0 && f.Active == 0 {
   1222 			prop(f, zbits, zbits)
   1223 			i = 1
   1224 		}
   1225 	}
   1226 
   1227 	if i != 0 {
   1228 		goto loop11
   1229 	}
   1230 	if change != 0 {
   1231 		goto loop1
   1232 	}
   1233 
   1234 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1235 		Dumpit("pass3", firstf, 1)
   1236 	}
   1237 
   1238 	/*
   1239 	 * pass 4
   1240 	 * iterate propagating register/variable synchrony
   1241 	 * 	forward until graph is complete
   1242 	 */
   1243 loop2:
   1244 	change = 0
   1245 
   1246 	for f = firstf; f != nil; f = f.Link {
   1247 		f.Active = 0
   1248 	}
   1249 	synch(firstf, zbits)
   1250 	if change != 0 {
   1251 		goto loop2
   1252 	}
   1253 
   1254 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1255 		Dumpit("pass4", firstf, 1)
   1256 	}
   1257 
   1258 	/*
   1259 	 * pass 4.5
   1260 	 * move register pseudo-variables into regu.
   1261 	 */
   1262 	mask := uint64((1 << uint(nreg)) - 1)
   1263 	for f := firstf; f != nil; f = f.Link {
   1264 		r := f.Data.(*Reg)
   1265 		r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
   1266 		r.set.b[0] &^= mask
   1267 		r.use1.b[0] &^= mask
   1268 		r.use2.b[0] &^= mask
   1269 		r.refbehind.b[0] &^= mask
   1270 		r.refahead.b[0] &^= mask
   1271 		r.calbehind.b[0] &^= mask
   1272 		r.calahead.b[0] &^= mask
   1273 		r.regdiff.b[0] &^= mask
   1274 		r.act.b[0] &^= mask
   1275 	}
   1276 
   1277 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1278 		Dumpit("pass4.5", firstf, 1)
   1279 	}
   1280 
   1281 	/*
   1282 	 * pass 5
   1283 	 * isolate regions
   1284 	 * calculate costs (paint1)
   1285 	 */
   1286 	var bit Bits
   1287 	if f := firstf; f != nil {
   1288 		r := f.Data.(*Reg)
   1289 		for z := 0; z < BITS; z++ {
   1290 			bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
   1291 		}
   1292 		if bany(&bit) && f.Refset == 0 {
   1293 			// should never happen - all variables are preset
   1294 			if Debug['w'] != 0 {
   1295 				fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
   1296 			}
   1297 			f.Refset = 1
   1298 		}
   1299 	}
   1300 
   1301 	for f := firstf; f != nil; f = f.Link {
   1302 		(f.Data.(*Reg)).act = zbits
   1303 	}
   1304 	nregion = 0
   1305 	region = region[:0]
   1306 	var rgp *Rgn
   1307 	for f := firstf; f != nil; f = f.Link {
   1308 		r := f.Data.(*Reg)
   1309 		for z := 0; z < BITS; z++ {
   1310 			bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
   1311 		}
   1312 		if bany(&bit) && f.Refset == 0 {
   1313 			if Debug['w'] != 0 {
   1314 				fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
   1315 			}
   1316 			f.Refset = 1
   1317 			Thearch.Excise(f)
   1318 		}
   1319 
   1320 		for z := 0; z < BITS; z++ {
   1321 			bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
   1322 		}
   1323 		for bany(&bit) {
   1324 			i = bnum(bit)
   1325 			change = 0
   1326 			paint1(f, i)
   1327 			biclr(&bit, uint(i))
   1328 			if change <= 0 {
   1329 				continue
   1330 			}
   1331 			if nregion >= MaxRgn {
   1332 				nregion++
   1333 				continue
   1334 			}
   1335 
   1336 			region = append(region, Rgn{
   1337 				enter: f,
   1338 				cost:  int16(change),
   1339 				varno: int16(i),
   1340 			})
   1341 			nregion++
   1342 		}
   1343 	}
   1344 
   1345 	if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
   1346 		Warn("regions: %d\n", nregion)
   1347 	}
   1348 	if nregion >= MaxRgn {
   1349 		if Debug['v'] != 0 {
   1350 			Warn("too many regions: %d\n", nregion)
   1351 		}
   1352 		nregion = MaxRgn
   1353 	}
   1354 
   1355 	sort.Sort(rcmp(region[:nregion]))
   1356 
   1357 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1358 		Dumpit("pass5", firstf, 1)
   1359 	}
   1360 
   1361 	/*
   1362 	 * pass 6
   1363 	 * determine used registers (paint2)
   1364 	 * replace code (paint3)
   1365 	 */
   1366 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1367 		fmt.Printf("\nregisterizing\n")
   1368 	}
   1369 	var usedreg uint64
   1370 	var vreg uint64
   1371 	for i := 0; i < nregion; i++ {
   1372 		rgp = &region[i]
   1373 		if Debug['R'] != 0 && Debug['v'] != 0 {
   1374 			fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
   1375 		}
   1376 		bit = blsh(uint(rgp.varno))
   1377 		usedreg = paint2(rgp.enter, int(rgp.varno), 0)
   1378 		vreg = allreg(usedreg, rgp)
   1379 		if rgp.regno != 0 {
   1380 			if Debug['R'] != 0 && Debug['v'] != 0 {
   1381 				v := &vars[rgp.varno]
   1382 				fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(int(v.etype), 0), obj.Rconv(int(rgp.regno)), usedreg, vreg)
   1383 			}
   1384 
   1385 			paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
   1386 		}
   1387 	}
   1388 
   1389 	/*
   1390 	 * free aux structures. peep allocates new ones.
   1391 	 */
   1392 	for i := 0; i < nvar; i++ {
   1393 		vars[i].node.SetOpt(nil)
   1394 	}
   1395 	Flowend(g)
   1396 	firstf = nil
   1397 
   1398 	if Debug['R'] != 0 && Debug['v'] != 0 {
   1399 		// Rebuild flow graph, since we inserted instructions
   1400 		g := Flowstart(firstp, nil)
   1401 		firstf = g.Start
   1402 		Dumpit("pass6", firstf, 0)
   1403 		Flowend(g)
   1404 		firstf = nil
   1405 	}
   1406 
   1407 	/*
   1408 	 * pass 7
   1409 	 * peep-hole on basic block
   1410 	 */
   1411 	if Debug['R'] == 0 || Debug['P'] != 0 {
   1412 		Thearch.Peep(firstp)
   1413 	}
   1414 
   1415 	/*
   1416 	 * eliminate nops
   1417 	 */
   1418 	for p := firstp; p != nil; p = p.Link {
   1419 		for p.Link != nil && p.Link.As == obj.ANOP {
   1420 			p.Link = p.Link.Link
   1421 		}
   1422 		if p.To.Type == obj.TYPE_BRANCH {
   1423 			for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
   1424 				p.To.Val = p.To.Val.(*obj.Prog).Link
   1425 			}
   1426 		}
   1427 	}
   1428 
   1429 	if Debug['R'] != 0 {
   1430 		if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
   1431 			fmt.Printf("\nstats\n")
   1432 		}
   1433 
   1434 		if Ostats.Ncvtreg != 0 {
   1435 			fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
   1436 		}
   1437 		if Ostats.Nspill != 0 {
   1438 			fmt.Printf("\t%4d spill\n", Ostats.Nspill)
   1439 		}
   1440 		if Ostats.Nreload != 0 {
   1441 			fmt.Printf("\t%4d reload\n", Ostats.Nreload)
   1442 		}
   1443 		if Ostats.Ndelmov != 0 {
   1444 			fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
   1445 		}
   1446 		if Ostats.Nvar != 0 {
   1447 			fmt.Printf("\t%4d var\n", Ostats.Nvar)
   1448 		}
   1449 		if Ostats.Naddr != 0 {
   1450 			fmt.Printf("\t%4d addr\n", Ostats.Naddr)
   1451 		}
   1452 
   1453 		Ostats = OptStats{}
   1454 	}
   1455 }
   1456 
   1457 // bany reports whether any bits in a are set.
   1458 func bany(a *Bits) bool {
   1459 	for _, x := range &a.b { // & to avoid making a copy of a.b
   1460 		if x != 0 {
   1461 			return true
   1462 		}
   1463 	}
   1464 	return false
   1465 }
   1466 
   1467 // bnum reports the lowest index of a 1 bit in a.
   1468 func bnum(a Bits) int {
   1469 	for i, x := range &a.b { // & to avoid making a copy of a.b
   1470 		if x != 0 {
   1471 			return 64*i + Bitno(x)
   1472 		}
   1473 	}
   1474 
   1475 	Fatal("bad in bnum")
   1476 	return 0
   1477 }
   1478 
   1479 // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
   1480 func blsh(n uint) Bits {
   1481 	c := zbits
   1482 	c.b[n/64] = 1 << (n % 64)
   1483 	return c
   1484 }
   1485 
   1486 // btest reports whether bit n is 1.
   1487 func btest(a *Bits, n uint) bool {
   1488 	return a.b[n/64]&(1<<(n%64)) != 0
   1489 }
   1490 
   1491 // biset sets bit n to 1.
   1492 func biset(a *Bits, n uint) {
   1493 	a.b[n/64] |= 1 << (n % 64)
   1494 }
   1495 
   1496 // biclr sets bit n to 0.
   1497 func biclr(a *Bits, n uint) {
   1498 	a.b[n/64] &^= (1 << (n % 64))
   1499 }
   1500 
   1501 // Bitno reports the lowest index of a 1 bit in b.
   1502 // It calls Fatal if there is no 1 bit.
   1503 func Bitno(b uint64) int {
   1504 	if b == 0 {
   1505 		Fatal("bad in bitno")
   1506 	}
   1507 	n := 0
   1508 	if b&(1<<32-1) == 0 {
   1509 		n += 32
   1510 		b >>= 32
   1511 	}
   1512 	if b&(1<<16-1) == 0 {
   1513 		n += 16
   1514 		b >>= 16
   1515 	}
   1516 	if b&(1<<8-1) == 0 {
   1517 		n += 8
   1518 		b >>= 8
   1519 	}
   1520 	if b&(1<<4-1) == 0 {
   1521 		n += 4
   1522 		b >>= 4
   1523 	}
   1524 	if b&(1<<2-1) == 0 {
   1525 		n += 2
   1526 		b >>= 2
   1527 	}
   1528 	if b&1 == 0 {
   1529 		n++
   1530 	}
   1531 	return n
   1532 }
   1533 
   1534 // String returns a space-separated list of the variables represented by bits.
   1535 func (bits Bits) String() string {
   1536 	// Note: This method takes a value receiver, both for convenience
   1537 	// and to make it safe to modify the bits as we process them.
   1538 	// Even so, most prints above use &bits, because then the value
   1539 	// being stored in the interface{} is a pointer and does not require
   1540 	// an allocation and copy to create the interface{}.
   1541 	var buf bytes.Buffer
   1542 	sep := ""
   1543 	for bany(&bits) {
   1544 		i := bnum(bits)
   1545 		buf.WriteString(sep)
   1546 		sep = " "
   1547 		v := &vars[i]
   1548 		if v.node == nil || v.node.Sym == nil {
   1549 			fmt.Fprintf(&buf, "$%d", i)
   1550 		} else {
   1551 			fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
   1552 			if v.offset != 0 {
   1553 				fmt.Fprintf(&buf, "%+d", int64(v.offset))
   1554 			}
   1555 		}
   1556 		biclr(&bits, uint(i))
   1557 	}
   1558 	return buf.String()
   1559 }
   1560