Home | History | Annotate | Download | only in ssa
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Register allocation.
      6 //
      7 // We use a version of a linear scan register allocator. We treat the
      8 // whole function as a single long basic block and run through
      9 // it using a greedy register allocator. Then all merge edges
     10 // (those targeting a block with len(Preds)>1) are processed to
     11 // shuffle data into the place that the target of the edge expects.
     12 //
     13 // The greedy allocator moves values into registers just before they
     14 // are used, spills registers only when necessary, and spills the
     15 // value whose next use is farthest in the future.
     16 //
     17 // The register allocator requires that a block is not scheduled until
     18 // at least one of its predecessors have been scheduled. The most recent
     19 // such predecessor provides the starting register state for a block.
     20 //
     21 // It also requires that there are no critical edges (critical =
     22 // comes from a block with >1 successor and goes to a block with >1
     23 // predecessor).  This makes it easy to add fixup code on merge edges -
     24 // the source of a merge edge has only one successor, so we can add
     25 // fixup code to the end of that block.
     26 
     27 // Spilling
     28 //
     29 // During the normal course of the allocator, we might throw a still-live
     30 // value out of all registers. When that value is subsequently used, we must
     31 // load it from a slot on the stack. We must also issue an instruction to
     32 // initialize that stack location with a copy of v.
     33 //
     34 // pre-regalloc:
     35 //   (1) v = Op ...
     36 //   (2) x = Op ...
     37 //   (3) ... = Op v ...
     38 //
     39 // post-regalloc:
     40 //   (1) v = Op ...    : AX // computes v, store result in AX
     41 //       s = StoreReg v     // spill v to a stack slot
     42 //   (2) x = Op ...    : AX // some other op uses AX
     43 //       c = LoadReg s : CX // restore v from stack slot
     44 //   (3) ... = Op c ...     // use the restored value
     45 //
     46 // Allocation occurs normally until we reach (3) and we realize we have
     47 // a use of v and it isn't in any register. At that point, we allocate
     48 // a spill (a StoreReg) for v. We can't determine the correct place for
     49 // the spill at this point, so we allocate the spill as blockless initially.
     50 // The restore is then generated to load v back into a register so it can
     51 // be used. Subsequent uses of v will use the restored value c instead.
     52 //
     53 // What remains is the question of where to schedule the spill.
     54 // During allocation, we keep track of the dominator of all restores of v.
     55 // The spill of v must dominate that block. The spill must also be issued at
     56 // a point where v is still in a register.
     57 //
     58 // To find the right place, start at b, the block which dominates all restores.
     59 //  - If b is v.Block, then issue the spill right after v.
     60 //    It is known to be in a register at that point, and dominates any restores.
     61 //  - Otherwise, if v is in a register at the start of b,
     62 //    put the spill of v at the start of b.
     63 //  - Otherwise, set b = immediate dominator of b, and repeat.
     64 //
     65 // Phi values are special, as always. We define two kinds of phis, those
     66 // where the merge happens in a register (a "register" phi) and those where
     67 // the merge happens in a stack location (a "stack" phi).
     68 //
     69 // A register phi must have the phi and all of its inputs allocated to the
     70 // same register. Register phis are spilled similarly to regular ops.
     71 //
     72 // A stack phi must have the phi and all of its inputs allocated to the same
     73 // stack location. Stack phis start out life already spilled - each phi
     74 // input must be a store (using StoreReg) at the end of the corresponding
     75 // predecessor block.
     76 //     b1: y = ... : AX        b2: z = ... : BX
     77 //         y2 = StoreReg y         z2 = StoreReg z
     78 //         goto b3                 goto b3
     79 //     b3: x = phi(y2, z2)
     80 // The stack allocator knows that StoreReg args of stack-allocated phis
     81 // must be allocated to the same stack slot as the phi that uses them.
     82 // x is now a spilled value and a restore must appear before its first use.
     83 
     84 // TODO
     85 
     86 // Use an affinity graph to mark two values which should use the
     87 // same register. This affinity graph will be used to prefer certain
     88 // registers for allocation. This affinity helps eliminate moves that
     89 // are required for phi implementations and helps generate allocations
     90 // for 2-register architectures.
     91 
     92 // Note: regalloc generates a not-quite-SSA output. If we have:
     93 //
     94 //             b1: x = ... : AX
     95 //                 x2 = StoreReg x
     96 //                 ... AX gets reused for something else ...
     97 //                 if ... goto b3 else b4
     98 //
     99 //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
    100 //       ... use x3 ...                 ... use x4 ...
    101 //
    102 //             b2: ... use x3 ...
    103 //
    104 // If b3 is the primary predecessor of b2, then we use x3 in b2 and
    105 // add a x4:CX->BX copy at the end of b4.
    106 // But the definition of x3 doesn't dominate b2.  We should really
    107 // insert a dummy phi at the start of b2 (x5=phi(x3,x4):BX) to keep
    108 // SSA form. For now, we ignore this problem as remaining in strict
    109 // SSA form isn't needed after regalloc. We'll just leave the use
    110 // of x3 not dominated by the definition of x3, and the CX->BX copy
    111 // will have no use (so don't run deadcode after regalloc!).
    112 // TODO: maybe we should introduce these extra phis?
    113 
    114 package ssa
    115 
    116 import (
    117 	"cmd/compile/internal/types"
    118 	"cmd/internal/objabi"
    119 	"cmd/internal/src"
    120 	"fmt"
    121 	"unsafe"
    122 )
    123 
    124 const (
    125 	moveSpills = iota
    126 	logSpills
    127 	regDebug
    128 	stackDebug
    129 )
    130 
    131 // distance is a measure of how far into the future values are used.
    132 // distance is measured in units of instructions.
    133 const (
    134 	likelyDistance   = 1
    135 	normalDistance   = 10
    136 	unlikelyDistance = 100
    137 )
    138 
    139 // regalloc performs register allocation on f. It sets f.RegAlloc
    140 // to the resulting allocation.
    141 func regalloc(f *Func) {
    142 	var s regAllocState
    143 	s.init(f)
    144 	s.regalloc(f)
    145 }
    146 
    147 type register uint8
    148 
    149 const noRegister register = 255
    150 
    151 type regMask uint64
    152 
    153 func (m regMask) String() string {
    154 	s := ""
    155 	for r := register(0); m != 0; r++ {
    156 		if m>>r&1 == 0 {
    157 			continue
    158 		}
    159 		m &^= regMask(1) << r
    160 		if s != "" {
    161 			s += " "
    162 		}
    163 		s += fmt.Sprintf("r%d", r)
    164 	}
    165 	return s
    166 }
    167 
    168 // countRegs returns the number of set bits in the register mask.
    169 func countRegs(r regMask) int {
    170 	n := 0
    171 	for r != 0 {
    172 		n += int(r & 1)
    173 		r >>= 1
    174 	}
    175 	return n
    176 }
    177 
    178 // pickReg picks an arbitrary register from the register mask.
    179 func pickReg(r regMask) register {
    180 	// pick the lowest one
    181 	if r == 0 {
    182 		panic("can't pick a register from an empty set")
    183 	}
    184 	for i := register(0); ; i++ {
    185 		if r&1 != 0 {
    186 			return i
    187 		}
    188 		r >>= 1
    189 	}
    190 }
    191 
    192 type use struct {
    193 	dist int32    // distance from start of the block to a use of a value
    194 	pos  src.XPos // source position of the use
    195 	next *use     // linked list of uses of a value in nondecreasing dist order
    196 }
    197 
    198 // A valState records the register allocation state for a (pre-regalloc) value.
    199 type valState struct {
    200 	regs              regMask // the set of registers holding a Value (usually just one)
    201 	uses              *use    // list of uses in this block
    202 	spill             *Value  // spilled copy of the Value (if any)
    203 	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
    204 	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
    205 	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
    206 	rematerializeable bool    // cached value of v.rematerializeable()
    207 }
    208 
    209 type regState struct {
    210 	v *Value // Original (preregalloc) Value stored in this register.
    211 	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
    212 	// If a register is unused, v==c==nil
    213 }
    214 
    215 type regAllocState struct {
    216 	f *Func
    217 
    218 	sdom        SparseTree
    219 	registers   []Register
    220 	numRegs     register
    221 	SPReg       register
    222 	SBReg       register
    223 	GReg        register
    224 	allocatable regMask
    225 
    226 	// for each block, its primary predecessor.
    227 	// A predecessor of b is primary if it is the closest
    228 	// predecessor that appears before b in the layout order.
    229 	// We record the index in the Preds list where the primary predecessor sits.
    230 	primary []int32
    231 
    232 	// live values at the end of each block.  live[b.ID] is a list of value IDs
    233 	// which are live at the end of b, together with a count of how many instructions
    234 	// forward to the next use.
    235 	live [][]liveInfo
    236 	// desired register assignments at the end of each block.
    237 	// Note that this is a static map computed before allocation occurs. Dynamic
    238 	// register desires (from partially completed allocations) will trump
    239 	// this information.
    240 	desired []desiredState
    241 
    242 	// current state of each (preregalloc) Value
    243 	values []valState
    244 
    245 	// names associated with each Value
    246 	valueNames [][]LocalSlot
    247 
    248 	// ID of SP, SB values
    249 	sp, sb ID
    250 
    251 	// For each Value, map from its value ID back to the
    252 	// preregalloc Value it was derived from.
    253 	orig []*Value
    254 
    255 	// current state of each register
    256 	regs []regState
    257 
    258 	// registers that contain values which can't be kicked out
    259 	nospill regMask
    260 
    261 	// mask of registers currently in use
    262 	used regMask
    263 
    264 	// mask of registers used in the current instruction
    265 	tmpused regMask
    266 
    267 	// current block we're working on
    268 	curBlock *Block
    269 
    270 	// cache of use records
    271 	freeUseRecords *use
    272 
    273 	// endRegs[blockid] is the register state at the end of each block.
    274 	// encoded as a set of endReg records.
    275 	endRegs [][]endReg
    276 
    277 	// startRegs[blockid] is the register state at the start of merge blocks.
    278 	// saved state does not include the state of phi ops in the block.
    279 	startRegs [][]startReg
    280 
    281 	// spillLive[blockid] is the set of live spills at the end of each block
    282 	spillLive [][]ID
    283 
    284 	// a set of copies we generated to move things around, and
    285 	// whether it is used in shuffle. Unused copies will be deleted.
    286 	copies map[*Value]bool
    287 
    288 	loopnest *loopnest
    289 }
    290 
    291 type endReg struct {
    292 	r register
    293 	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
    294 	c *Value // cached version of the value
    295 }
    296 
    297 type startReg struct {
    298 	r   register
    299 	v   *Value   // pre-regalloc value needed in this register
    300 	c   *Value   // cached version of the value
    301 	pos src.XPos // source position of use of this register
    302 }
    303 
    304 // freeReg frees up register r. Any current user of r is kicked out.
    305 func (s *regAllocState) freeReg(r register) {
    306 	s.freeOrResetReg(r, false)
    307 }
    308 
    309 // freeOrResetReg frees up register r. Any current user of r is kicked out.
    310 // resetting indicates that the operation is only for bookkeeping,
    311 // e.g. when clearing out state upon entry to a new block.
    312 func (s *regAllocState) freeOrResetReg(r register, resetting bool) {
    313 	v := s.regs[r].v
    314 	if v == nil {
    315 		s.f.Fatalf("tried to free an already free register %d\n", r)
    316 	}
    317 
    318 	// Mark r as unused.
    319 	if s.f.pass.debug > regDebug {
    320 		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
    321 	}
    322 	if !resetting && s.f.Config.ctxt.Flag_locationlists && len(s.valueNames[v.ID]) != 0 {
    323 		kill := s.curBlock.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid)
    324 		for int(kill.ID) >= len(s.orig) {
    325 			s.orig = append(s.orig, nil)
    326 		}
    327 		for _, name := range s.valueNames[v.ID] {
    328 			s.f.NamedValues[name] = append(s.f.NamedValues[name], kill)
    329 		}
    330 		s.f.setHome(kill, &s.registers[r])
    331 	}
    332 	s.regs[r] = regState{}
    333 	s.values[v.ID].regs &^= regMask(1) << r
    334 	s.used &^= regMask(1) << r
    335 }
    336 
    337 // freeRegs frees up all registers listed in m.
    338 func (s *regAllocState) freeRegs(m regMask) {
    339 	for m&s.used != 0 {
    340 		s.freeReg(pickReg(m & s.used))
    341 	}
    342 }
    343 
    344 // setOrig records that c's original value is the same as
    345 // v's original value.
    346 func (s *regAllocState) setOrig(c *Value, v *Value) {
    347 	for int(c.ID) >= len(s.orig) {
    348 		s.orig = append(s.orig, nil)
    349 	}
    350 	if s.orig[c.ID] != nil {
    351 		s.f.Fatalf("orig value set twice %s %s", c, v)
    352 	}
    353 	s.orig[c.ID] = s.orig[v.ID]
    354 }
    355 
    356 // assignReg assigns register r to hold c, a copy of v.
    357 // r must be unused.
    358 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
    359 	if s.f.pass.debug > regDebug {
    360 		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
    361 	}
    362 	if s.regs[r].v != nil {
    363 		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
    364 	}
    365 
    366 	// Update state.
    367 	s.regs[r] = regState{v, c}
    368 	s.values[v.ID].regs |= regMask(1) << r
    369 	s.used |= regMask(1) << r
    370 	s.f.setHome(c, &s.registers[r])
    371 }
    372 
    373 // allocReg chooses a register from the set of registers in mask.
    374 // If there is no unused register, a Value will be kicked out of
    375 // a register to make room.
    376 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
    377 	mask &= s.allocatable
    378 	mask &^= s.nospill
    379 	if mask == 0 {
    380 		s.f.Fatalf("no register available for %s", v)
    381 	}
    382 
    383 	// Pick an unused register if one is available.
    384 	if mask&^s.used != 0 {
    385 		return pickReg(mask &^ s.used)
    386 	}
    387 
    388 	// Pick a value to spill. Spill the value with the
    389 	// farthest-in-the-future use.
    390 	// TODO: Prefer registers with already spilled Values?
    391 	// TODO: Modify preference using affinity graph.
    392 	// TODO: if a single value is in multiple registers, spill one of them
    393 	// before spilling a value in just a single register.
    394 
    395 	// Find a register to spill. We spill the register containing the value
    396 	// whose next use is as far in the future as possible.
    397 	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
    398 	var r register
    399 	maxuse := int32(-1)
    400 	for t := register(0); t < s.numRegs; t++ {
    401 		if mask>>t&1 == 0 {
    402 			continue
    403 		}
    404 		v := s.regs[t].v
    405 		if n := s.values[v.ID].uses.dist; n > maxuse {
    406 			// v's next use is farther in the future than any value
    407 			// we've seen so far. A new best spill candidate.
    408 			r = t
    409 			maxuse = n
    410 		}
    411 	}
    412 	if maxuse == -1 {
    413 		s.f.Fatalf("couldn't find register to spill")
    414 	}
    415 
    416 	// Try to move it around before kicking out, if there is a free register.
    417 	// We generate a Copy and record it. It will be deleted if never used.
    418 	v2 := s.regs[r].v
    419 	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
    420 	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
    421 		r2 := pickReg(m)
    422 		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
    423 		s.copies[c] = false
    424 		if s.f.pass.debug > regDebug {
    425 			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
    426 		}
    427 		s.setOrig(c, v2)
    428 		s.assignReg(r2, v2, c)
    429 	}
    430 	s.freeReg(r)
    431 	return r
    432 }
    433 
    434 // makeSpill returns a Value which represents the spilled value of v.
    435 // b is the block in which the spill is used.
    436 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
    437 	vi := &s.values[v.ID]
    438 	if vi.spill != nil {
    439 		// Final block not known - keep track of subtree where restores reside.
    440 		vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
    441 		vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
    442 		return vi.spill
    443 	}
    444 	// Make a spill for v. We don't know where we want
    445 	// to put it yet, so we leave it blockless for now.
    446 	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
    447 	// We also don't know what the spill's arg will be.
    448 	// Leave it argless for now.
    449 	s.setOrig(spill, v)
    450 	vi.spill = spill
    451 	vi.restoreMin = s.sdom[b.ID].entry
    452 	vi.restoreMax = s.sdom[b.ID].exit
    453 	return spill
    454 }
    455 
    456 // allocValToReg allocates v to a register selected from regMask and
    457 // returns the register copy of v. Any previous user is kicked out and spilled
    458 // (if necessary). Load code is added at the current pc. If nospill is set the
    459 // allocated register is marked nospill so the assignment cannot be
    460 // undone until the caller allows it by clearing nospill. Returns a
    461 // *Value which is either v or a copy of v allocated to the chosen register.
    462 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
    463 	vi := &s.values[v.ID]
    464 
    465 	// Check if v is already in a requested register.
    466 	if mask&vi.regs != 0 {
    467 		r := pickReg(mask & vi.regs)
    468 		if s.regs[r].v != v || s.regs[r].c == nil {
    469 			panic("bad register state")
    470 		}
    471 		if nospill {
    472 			s.nospill |= regMask(1) << r
    473 		}
    474 		return s.regs[r].c
    475 	}
    476 
    477 	// Allocate a register.
    478 	r := s.allocReg(mask, v)
    479 
    480 	// Allocate v to the new register.
    481 	var c *Value
    482 	if vi.regs != 0 {
    483 		// Copy from a register that v is already in.
    484 		r2 := pickReg(vi.regs)
    485 		if s.regs[r2].v != v {
    486 			panic("bad register state")
    487 		}
    488 		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
    489 	} else if v.rematerializeable() {
    490 		// Rematerialize instead of loading from the spill location.
    491 		c = v.copyIntoWithXPos(s.curBlock, pos)
    492 	} else {
    493 		// Load v from its spill location.
    494 		spill := s.makeSpill(v, s.curBlock)
    495 		if s.f.pass.debug > logSpills {
    496 			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
    497 		}
    498 		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
    499 	}
    500 	s.setOrig(c, v)
    501 	s.assignReg(r, v, c)
    502 	if nospill {
    503 		s.nospill |= regMask(1) << r
    504 	}
    505 	return c
    506 }
    507 
    508 // isLeaf reports whether f performs any calls.
    509 func isLeaf(f *Func) bool {
    510 	for _, b := range f.Blocks {
    511 		for _, v := range b.Values {
    512 			if opcodeTable[v.Op].call {
    513 				return false
    514 			}
    515 		}
    516 	}
    517 	return true
    518 }
    519 
    520 func (s *regAllocState) init(f *Func) {
    521 	s.f = f
    522 	s.f.RegAlloc = s.f.Cache.locs[:0]
    523 	s.registers = f.Config.registers
    524 	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
    525 		s.f.Fatalf("bad number of registers: %d", nr)
    526 	} else {
    527 		s.numRegs = register(nr)
    528 	}
    529 	// Locate SP, SB, and g registers.
    530 	s.SPReg = noRegister
    531 	s.SBReg = noRegister
    532 	s.GReg = noRegister
    533 	for r := register(0); r < s.numRegs; r++ {
    534 		switch s.registers[r].String() {
    535 		case "SP":
    536 			s.SPReg = r
    537 		case "SB":
    538 			s.SBReg = r
    539 		case "g":
    540 			s.GReg = r
    541 		}
    542 	}
    543 	// Make sure we found all required registers.
    544 	switch noRegister {
    545 	case s.SPReg:
    546 		s.f.Fatalf("no SP register found")
    547 	case s.SBReg:
    548 		s.f.Fatalf("no SB register found")
    549 	case s.GReg:
    550 		if f.Config.hasGReg {
    551 			s.f.Fatalf("no g register found")
    552 		}
    553 	}
    554 
    555 	// Figure out which registers we're allowed to use.
    556 	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
    557 	s.allocatable &^= 1 << s.SPReg
    558 	s.allocatable &^= 1 << s.SBReg
    559 	if s.f.Config.hasGReg {
    560 		s.allocatable &^= 1 << s.GReg
    561 	}
    562 	if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 {
    563 		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
    564 	}
    565 	if s.f.Config.LinkReg != -1 {
    566 		if isLeaf(f) {
    567 			// Leaf functions don't save/restore the link register.
    568 			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
    569 		}
    570 		if s.f.Config.arch == "arm" && objabi.GOARM == 5 {
    571 			// On ARMv5 we insert softfloat calls at each FP instruction.
    572 			// This clobbers LR almost everywhere. Disable allocating LR
    573 			// on ARMv5.
    574 			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
    575 		}
    576 	}
    577 	if s.f.Config.ctxt.Flag_dynlink {
    578 		switch s.f.Config.arch {
    579 		case "amd64":
    580 			s.allocatable &^= 1 << 15 // R15
    581 		case "arm":
    582 			s.allocatable &^= 1 << 9 // R9
    583 		case "ppc64le": // R2 already reserved.
    584 			// nothing to do
    585 		case "arm64":
    586 			// nothing to do?
    587 		case "386":
    588 			// nothing to do.
    589 			// Note that for Flag_shared (position independent code)
    590 			// we do need to be careful, but that carefulness is hidden
    591 			// in the rewrite rules so we always have a free register
    592 			// available for global load/stores. See gen/386.rules (search for Flag_shared).
    593 		case "s390x":
    594 			// nothing to do, R10 & R11 already reserved
    595 		default:
    596 			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
    597 		}
    598 	}
    599 	if s.f.Config.nacl {
    600 		switch s.f.Config.arch {
    601 		case "arm":
    602 			s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm
    603 		case "amd64p32":
    604 			s.allocatable &^= 1 << 5  // BP - reserved for nacl
    605 			s.allocatable &^= 1 << 15 // R15 - reserved for nacl
    606 		}
    607 	}
    608 	if s.f.Config.use387 {
    609 		s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go)
    610 	}
    611 
    612 	s.regs = make([]regState, s.numRegs)
    613 	s.values = make([]valState, f.NumValues())
    614 	s.orig = make([]*Value, f.NumValues())
    615 	s.copies = make(map[*Value]bool)
    616 	if s.f.Config.ctxt.Flag_locationlists {
    617 		s.valueNames = make([][]LocalSlot, f.NumValues())
    618 		for slot, values := range f.NamedValues {
    619 			if isSynthetic(&slot) {
    620 				continue
    621 			}
    622 			for _, value := range values {
    623 				s.valueNames[value.ID] = append(s.valueNames[value.ID], slot)
    624 			}
    625 		}
    626 	}
    627 	for _, b := range f.Blocks {
    628 		for _, v := range b.Values {
    629 			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
    630 				s.values[v.ID].needReg = true
    631 				s.values[v.ID].rematerializeable = v.rematerializeable()
    632 				s.orig[v.ID] = v
    633 			}
    634 			// Note: needReg is false for values returning Tuple types.
    635 			// Instead, we mark the corresponding Selects as needReg.
    636 		}
    637 	}
    638 	s.computeLive()
    639 
    640 	// Compute block order. This array allows us to distinguish forward edges
    641 	// from backward edges and compute how far they go.
    642 	blockOrder := make([]int32, f.NumBlocks())
    643 	for i, b := range f.Blocks {
    644 		blockOrder[b.ID] = int32(i)
    645 	}
    646 
    647 	// Compute primary predecessors.
    648 	s.primary = make([]int32, f.NumBlocks())
    649 	for _, b := range f.Blocks {
    650 		best := -1
    651 		for i, e := range b.Preds {
    652 			p := e.b
    653 			if blockOrder[p.ID] >= blockOrder[b.ID] {
    654 				continue // backward edge
    655 			}
    656 			if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] {
    657 				best = i
    658 			}
    659 		}
    660 		s.primary[b.ID] = int32(best)
    661 	}
    662 
    663 	s.endRegs = make([][]endReg, f.NumBlocks())
    664 	s.startRegs = make([][]startReg, f.NumBlocks())
    665 	s.spillLive = make([][]ID, f.NumBlocks())
    666 	s.sdom = f.sdom()
    667 }
    668 
    669 // Adds a use record for id at distance dist from the start of the block.
    670 // All calls to addUse must happen with nonincreasing dist.
    671 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
    672 	r := s.freeUseRecords
    673 	if r != nil {
    674 		s.freeUseRecords = r.next
    675 	} else {
    676 		r = &use{}
    677 	}
    678 	r.dist = dist
    679 	r.pos = pos
    680 	r.next = s.values[id].uses
    681 	s.values[id].uses = r
    682 	if r.next != nil && dist > r.next.dist {
    683 		s.f.Fatalf("uses added in wrong order")
    684 	}
    685 }
    686 
    687 // advanceUses advances the uses of v's args from the state before v to the state after v.
    688 // Any values which have no more uses are deallocated from registers.
    689 func (s *regAllocState) advanceUses(v *Value) {
    690 	for _, a := range v.Args {
    691 		if !s.values[a.ID].needReg {
    692 			continue
    693 		}
    694 		ai := &s.values[a.ID]
    695 		r := ai.uses
    696 		ai.uses = r.next
    697 		if r.next == nil {
    698 			// Value is dead, free all registers that hold it.
    699 			s.freeRegs(ai.regs)
    700 		}
    701 		r.next = s.freeUseRecords
    702 		s.freeUseRecords = r
    703 	}
    704 }
    705 
    706 // liveAfterCurrentInstruction reports whether v is live after
    707 // the current instruction is completed.  v must be used by the
    708 // current instruction.
    709 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
    710 	u := s.values[v.ID].uses
    711 	d := u.dist
    712 	for u != nil && u.dist == d {
    713 		u = u.next
    714 	}
    715 	return u != nil && u.dist > d
    716 }
    717 
    718 // Sets the state of the registers to that encoded in regs.
    719 func (s *regAllocState) setState(regs []endReg) {
    720 	for s.used != 0 {
    721 		s.freeOrResetReg(pickReg(s.used), true)
    722 	}
    723 	for _, x := range regs {
    724 		s.assignReg(x.r, x.v, x.c)
    725 	}
    726 }
    727 
    728 // compatRegs returns the set of registers which can store a type t.
    729 func (s *regAllocState) compatRegs(t *types.Type) regMask {
    730 	var m regMask
    731 	if t.IsTuple() || t.IsFlags() {
    732 		return 0
    733 	}
    734 	if t.IsFloat() || t == types.TypeInt128 {
    735 		m = s.f.Config.fpRegMask
    736 	} else {
    737 		m = s.f.Config.gpRegMask
    738 	}
    739 	return m & s.allocatable
    740 }
    741 
    742 func (s *regAllocState) regalloc(f *Func) {
    743 	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
    744 	defer f.retSparseSet(regValLiveSet)
    745 	var oldSched []*Value
    746 	var phis []*Value
    747 	var phiRegs []register
    748 	var args []*Value
    749 
    750 	// Data structure used for computing desired registers.
    751 	var desired desiredState
    752 
    753 	// Desired registers for inputs & outputs for each instruction in the block.
    754 	type dentry struct {
    755 		out [4]register    // desired output registers
    756 		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
    757 	}
    758 	var dinfo []dentry
    759 
    760 	if f.Entry != f.Blocks[0] {
    761 		f.Fatalf("entry block must be first")
    762 	}
    763 
    764 	for _, b := range f.Blocks {
    765 		if s.f.pass.debug > regDebug {
    766 			fmt.Printf("Begin processing block %v\n", b)
    767 		}
    768 		s.curBlock = b
    769 
    770 		// Initialize regValLiveSet and uses fields for this block.
    771 		// Walk backwards through the block doing liveness analysis.
    772 		regValLiveSet.clear()
    773 		for _, e := range s.live[b.ID] {
    774 			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
    775 			regValLiveSet.add(e.ID)
    776 		}
    777 		if v := b.Control; v != nil && s.values[v.ID].needReg {
    778 			s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value
    779 			regValLiveSet.add(v.ID)
    780 		}
    781 		for i := len(b.Values) - 1; i >= 0; i-- {
    782 			v := b.Values[i]
    783 			regValLiveSet.remove(v.ID)
    784 			if v.Op == OpPhi {
    785 				// Remove v from the live set, but don't add
    786 				// any inputs. This is the state the len(b.Preds)>1
    787 				// case below desires; it wants to process phis specially.
    788 				continue
    789 			}
    790 			if opcodeTable[v.Op].call {
    791 				// Function call clobbers all the registers but SP and SB.
    792 				regValLiveSet.clear()
    793 				if s.sp != 0 && s.values[s.sp].uses != nil {
    794 					regValLiveSet.add(s.sp)
    795 				}
    796 				if s.sb != 0 && s.values[s.sb].uses != nil {
    797 					regValLiveSet.add(s.sb)
    798 				}
    799 			}
    800 			for _, a := range v.Args {
    801 				if !s.values[a.ID].needReg {
    802 					continue
    803 				}
    804 				s.addUse(a.ID, int32(i), v.Pos)
    805 				regValLiveSet.add(a.ID)
    806 			}
    807 		}
    808 		if s.f.pass.debug > regDebug {
    809 			fmt.Printf("uses for %s:%s\n", s.f.Name, b)
    810 			for i := range s.values {
    811 				vi := &s.values[i]
    812 				u := vi.uses
    813 				if u == nil {
    814 					continue
    815 				}
    816 				fmt.Printf("  v%d:", i)
    817 				for u != nil {
    818 					fmt.Printf(" %d", u.dist)
    819 					u = u.next
    820 				}
    821 				fmt.Println()
    822 			}
    823 		}
    824 
    825 		// Make a copy of the block schedule so we can generate a new one in place.
    826 		// We make a separate copy for phis and regular values.
    827 		nphi := 0
    828 		for _, v := range b.Values {
    829 			if v.Op != OpPhi {
    830 				break
    831 			}
    832 			nphi++
    833 		}
    834 		phis = append(phis[:0], b.Values[:nphi]...)
    835 		oldSched = append(oldSched[:0], b.Values[nphi:]...)
    836 		b.Values = b.Values[:0]
    837 
    838 		// Initialize start state of block.
    839 		if b == f.Entry {
    840 			// Regalloc state is empty to start.
    841 			if nphi > 0 {
    842 				f.Fatalf("phis in entry block")
    843 			}
    844 		} else if len(b.Preds) == 1 {
    845 			// Start regalloc state with the end state of the previous block.
    846 			s.setState(s.endRegs[b.Preds[0].b.ID])
    847 			if nphi > 0 {
    848 				f.Fatalf("phis in single-predecessor block")
    849 			}
    850 			// Drop any values which are no longer live.
    851 			// This may happen because at the end of p, a value may be
    852 			// live but only used by some other successor of p.
    853 			for r := register(0); r < s.numRegs; r++ {
    854 				v := s.regs[r].v
    855 				if v != nil && !regValLiveSet.contains(v.ID) {
    856 					s.freeReg(r)
    857 				}
    858 			}
    859 		} else {
    860 			// This is the complicated case. We have more than one predecessor,
    861 			// which means we may have Phi ops.
    862 
    863 			// Start with the final register state of the primary predecessor
    864 			idx := s.primary[b.ID]
    865 			if idx < 0 {
    866 				f.Fatalf("block with no primary predecessor %s", b)
    867 			}
    868 			p := b.Preds[idx].b
    869 			s.setState(s.endRegs[p.ID])
    870 
    871 			if s.f.pass.debug > regDebug {
    872 				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
    873 				for _, x := range s.endRegs[p.ID] {
    874 					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
    875 				}
    876 			}
    877 
    878 			// Decide on registers for phi ops. Use the registers determined
    879 			// by the primary predecessor if we can.
    880 			// TODO: pick best of (already processed) predecessors?
    881 			// Majority vote? Deepest nesting level?
    882 			phiRegs = phiRegs[:0]
    883 			var phiUsed regMask
    884 			for _, v := range phis {
    885 				if !s.values[v.ID].needReg {
    886 					phiRegs = append(phiRegs, noRegister)
    887 					continue
    888 				}
    889 				a := v.Args[idx]
    890 				// Some instructions target not-allocatable registers.
    891 				// They're not suitable for further (phi-function) allocation.
    892 				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
    893 				if m != 0 {
    894 					r := pickReg(m)
    895 					phiUsed |= regMask(1) << r
    896 					phiRegs = append(phiRegs, r)
    897 				} else {
    898 					phiRegs = append(phiRegs, noRegister)
    899 				}
    900 			}
    901 
    902 			// Second pass - deallocate any phi inputs which are now dead.
    903 			for i, v := range phis {
    904 				if !s.values[v.ID].needReg {
    905 					continue
    906 				}
    907 				a := v.Args[idx]
    908 				if !regValLiveSet.contains(a.ID) {
    909 					// Input is dead beyond the phi, deallocate
    910 					// anywhere else it might live.
    911 					s.freeRegs(s.values[a.ID].regs)
    912 				} else {
    913 					// Input is still live.
    914 					// Try to move it around before kicking out, if there is a free register.
    915 					// We generate a Copy in the predecessor block and record it. It will be
    916 					// deleted if never used.
    917 					r := phiRegs[i]
    918 					if r == noRegister {
    919 						continue
    920 					}
    921 					// Pick a free register. At this point some registers used in the predecessor
    922 					// block may have been deallocated. Those are the ones used for Phis. Exclude
    923 					// them (and they are not going to be helpful anyway).
    924 					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
    925 					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
    926 						r2 := pickReg(m)
    927 						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
    928 						s.copies[c] = false
    929 						if s.f.pass.debug > regDebug {
    930 							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
    931 						}
    932 						s.setOrig(c, a)
    933 						s.assignReg(r2, a, c)
    934 						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
    935 					}
    936 					s.freeReg(r)
    937 				}
    938 			}
    939 
    940 			// Copy phi ops into new schedule.
    941 			b.Values = append(b.Values, phis...)
    942 
    943 			// Third pass - pick registers for phis whose inputs
    944 			// were not in a register.
    945 			for i, v := range phis {
    946 				if !s.values[v.ID].needReg {
    947 					continue
    948 				}
    949 				if phiRegs[i] != noRegister {
    950 					continue
    951 				}
    952 				if s.f.Config.use387 && v.Type.IsFloat() {
    953 					continue // 387 can't handle floats in registers between blocks
    954 				}
    955 				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
    956 				if m != 0 {
    957 					r := pickReg(m)
    958 					phiRegs[i] = r
    959 					phiUsed |= regMask(1) << r
    960 				}
    961 			}
    962 
    963 			// Set registers for phis. Add phi spill code.
    964 			for i, v := range phis {
    965 				if !s.values[v.ID].needReg {
    966 					continue
    967 				}
    968 				r := phiRegs[i]
    969 				if r == noRegister {
    970 					// stack-based phi
    971 					// Spills will be inserted in all the predecessors below.
    972 					s.values[v.ID].spill = v // v starts life spilled
    973 					continue
    974 				}
    975 				// register-based phi
    976 				s.assignReg(r, v, v)
    977 			}
    978 
    979 			// Deallocate any values which are no longer live. Phis are excluded.
    980 			for r := register(0); r < s.numRegs; r++ {
    981 				if phiUsed>>r&1 != 0 {
    982 					continue
    983 				}
    984 				v := s.regs[r].v
    985 				if v != nil && !regValLiveSet.contains(v.ID) {
    986 					s.freeReg(r)
    987 				}
    988 			}
    989 
    990 			// Save the starting state for use by merge edges.
    991 			var regList []startReg
    992 			for r := register(0); r < s.numRegs; r++ {
    993 				v := s.regs[r].v
    994 				if v == nil {
    995 					continue
    996 				}
    997 				if phiUsed>>r&1 != 0 {
    998 					// Skip registers that phis used, we'll handle those
    999 					// specially during merge edge processing.
   1000 					continue
   1001 				}
   1002 				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
   1003 			}
   1004 			s.startRegs[b.ID] = regList
   1005 
   1006 			if s.f.pass.debug > regDebug {
   1007 				fmt.Printf("after phis\n")
   1008 				for _, x := range s.startRegs[b.ID] {
   1009 					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
   1010 				}
   1011 			}
   1012 		}
   1013 
   1014 		// Allocate space to record the desired registers for each value.
   1015 		dinfo = dinfo[:0]
   1016 		for i := 0; i < len(oldSched); i++ {
   1017 			dinfo = append(dinfo, dentry{})
   1018 		}
   1019 
   1020 		// Load static desired register info at the end of the block.
   1021 		desired.copy(&s.desired[b.ID])
   1022 
   1023 		// Check actual assigned registers at the start of the next block(s).
   1024 		// Dynamically assigned registers will trump the static
   1025 		// desired registers computed during liveness analysis.
   1026 		// Note that we do this phase after startRegs is set above, so that
   1027 		// we get the right behavior for a block which branches to itself.
   1028 		for _, e := range b.Succs {
   1029 			succ := e.b
   1030 			// TODO: prioritize likely successor?
   1031 			for _, x := range s.startRegs[succ.ID] {
   1032 				desired.add(x.v.ID, x.r)
   1033 			}
   1034 			// Process phi ops in succ.
   1035 			pidx := e.i
   1036 			for _, v := range succ.Values {
   1037 				if v.Op != OpPhi {
   1038 					continue
   1039 				}
   1040 				if !s.values[v.ID].needReg {
   1041 					continue
   1042 				}
   1043 				rp, ok := s.f.getHome(v.ID).(*Register)
   1044 				if !ok {
   1045 					continue
   1046 				}
   1047 				desired.add(v.Args[pidx].ID, register(rp.num))
   1048 			}
   1049 		}
   1050 		// Walk values backwards computing desired register info.
   1051 		// See computeLive for more comments.
   1052 		for i := len(oldSched) - 1; i >= 0; i-- {
   1053 			v := oldSched[i]
   1054 			prefs := desired.remove(v.ID)
   1055 			desired.clobber(opcodeTable[v.Op].reg.clobbers)
   1056 			for _, j := range opcodeTable[v.Op].reg.inputs {
   1057 				if countRegs(j.regs) != 1 {
   1058 					continue
   1059 				}
   1060 				desired.clobber(j.regs)
   1061 				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
   1062 			}
   1063 			if opcodeTable[v.Op].resultInArg0 {
   1064 				if opcodeTable[v.Op].commutative {
   1065 					desired.addList(v.Args[1].ID, prefs)
   1066 				}
   1067 				desired.addList(v.Args[0].ID, prefs)
   1068 			}
   1069 			// Save desired registers for this value.
   1070 			dinfo[i].out = prefs
   1071 			for j, a := range v.Args {
   1072 				if j >= len(dinfo[i].in) {
   1073 					break
   1074 				}
   1075 				dinfo[i].in[j] = desired.get(a.ID)
   1076 			}
   1077 		}
   1078 
   1079 		// Process all the non-phi values.
   1080 		for idx, v := range oldSched {
   1081 			if s.f.pass.debug > regDebug {
   1082 				fmt.Printf("  processing %s\n", v.LongString())
   1083 			}
   1084 			regspec := opcodeTable[v.Op].reg
   1085 			if v.Op == OpPhi {
   1086 				f.Fatalf("phi %s not at start of block", v)
   1087 			}
   1088 			if v.Op == OpSP {
   1089 				s.assignReg(s.SPReg, v, v)
   1090 				b.Values = append(b.Values, v)
   1091 				s.advanceUses(v)
   1092 				s.sp = v.ID
   1093 				continue
   1094 			}
   1095 			if v.Op == OpSB {
   1096 				s.assignReg(s.SBReg, v, v)
   1097 				b.Values = append(b.Values, v)
   1098 				s.advanceUses(v)
   1099 				s.sb = v.ID
   1100 				continue
   1101 			}
   1102 			if v.Op == OpSelect0 || v.Op == OpSelect1 {
   1103 				if s.values[v.ID].needReg {
   1104 					var i = 0
   1105 					if v.Op == OpSelect1 {
   1106 						i = 1
   1107 					}
   1108 					s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
   1109 				}
   1110 				b.Values = append(b.Values, v)
   1111 				s.advanceUses(v)
   1112 				goto issueSpill
   1113 			}
   1114 			if v.Op == OpGetG && s.f.Config.hasGReg {
   1115 				// use hardware g register
   1116 				if s.regs[s.GReg].v != nil {
   1117 					s.freeReg(s.GReg) // kick out the old value
   1118 				}
   1119 				s.assignReg(s.GReg, v, v)
   1120 				b.Values = append(b.Values, v)
   1121 				s.advanceUses(v)
   1122 				goto issueSpill
   1123 			}
   1124 			if v.Op == OpArg {
   1125 				// Args are "pre-spilled" values. We don't allocate
   1126 				// any register here. We just set up the spill pointer to
   1127 				// point at itself and any later user will restore it to use it.
   1128 				s.values[v.ID].spill = v
   1129 				b.Values = append(b.Values, v)
   1130 				s.advanceUses(v)
   1131 				continue
   1132 			}
   1133 			if v.Op == OpKeepAlive {
   1134 				// Make sure the argument to v is still live here.
   1135 				s.advanceUses(v)
   1136 				a := v.Args[0]
   1137 				vi := &s.values[a.ID]
   1138 				if vi.regs == 0 && !vi.rematerializeable {
   1139 					// Use the spill location.
   1140 					// This forces later liveness analysis to make the
   1141 					// value live at this point.
   1142 					v.SetArg(0, s.makeSpill(a, b))
   1143 				} else {
   1144 					// In-register and rematerializeable values are already live.
   1145 					// These are typically rematerializeable constants like nil,
   1146 					// or values of a variable that were modified since the last call.
   1147 					v.Op = OpCopy
   1148 					v.SetArgs1(v.Args[1])
   1149 				}
   1150 				b.Values = append(b.Values, v)
   1151 				continue
   1152 			}
   1153 			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
   1154 				// No register allocation required (or none specified yet)
   1155 				s.freeRegs(regspec.clobbers)
   1156 				b.Values = append(b.Values, v)
   1157 				s.advanceUses(v)
   1158 				continue
   1159 			}
   1160 
   1161 			if s.values[v.ID].rematerializeable {
   1162 				// Value is rematerializeable, don't issue it here.
   1163 				// It will get issued just before each use (see
   1164 				// allocValueToReg).
   1165 				for _, a := range v.Args {
   1166 					a.Uses--
   1167 				}
   1168 				s.advanceUses(v)
   1169 				continue
   1170 			}
   1171 
   1172 			if s.f.pass.debug > regDebug {
   1173 				fmt.Printf("value %s\n", v.LongString())
   1174 				fmt.Printf("  out:")
   1175 				for _, r := range dinfo[idx].out {
   1176 					if r != noRegister {
   1177 						fmt.Printf(" %s", &s.registers[r])
   1178 					}
   1179 				}
   1180 				fmt.Println()
   1181 				for i := 0; i < len(v.Args) && i < 3; i++ {
   1182 					fmt.Printf("  in%d:", i)
   1183 					for _, r := range dinfo[idx].in[i] {
   1184 						if r != noRegister {
   1185 							fmt.Printf(" %s", &s.registers[r])
   1186 						}
   1187 					}
   1188 					fmt.Println()
   1189 				}
   1190 			}
   1191 
   1192 			// Move arguments to registers. Process in an ordering defined
   1193 			// by the register specification (most constrained first).
   1194 			args = append(args[:0], v.Args...)
   1195 			for _, i := range regspec.inputs {
   1196 				mask := i.regs
   1197 				if mask&s.values[args[i.idx].ID].regs == 0 {
   1198 					// Need a new register for the input.
   1199 					mask &= s.allocatable
   1200 					mask &^= s.nospill
   1201 					// Used desired register if available.
   1202 					if i.idx < 3 {
   1203 						for _, r := range dinfo[idx].in[i.idx] {
   1204 							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
   1205 								// Desired register is allowed and unused.
   1206 								mask = regMask(1) << r
   1207 								break
   1208 							}
   1209 						}
   1210 					}
   1211 					// Avoid registers we're saving for other values.
   1212 					if mask&^desired.avoid != 0 {
   1213 						mask &^= desired.avoid
   1214 					}
   1215 				}
   1216 				args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos)
   1217 			}
   1218 
   1219 			// If the output clobbers the input register, make sure we have
   1220 			// at least two copies of the input register so we don't
   1221 			// have to reload the value from the spill location.
   1222 			if opcodeTable[v.Op].resultInArg0 {
   1223 				var m regMask
   1224 				if !s.liveAfterCurrentInstruction(v.Args[0]) {
   1225 					// arg0 is dead.  We can clobber its register.
   1226 					goto ok
   1227 				}
   1228 				if s.values[v.Args[0].ID].rematerializeable {
   1229 					// We can rematerialize the input, don't worry about clobbering it.
   1230 					goto ok
   1231 				}
   1232 				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
   1233 					// we have at least 2 copies of arg0.  We can afford to clobber one.
   1234 					goto ok
   1235 				}
   1236 				if opcodeTable[v.Op].commutative {
   1237 					if !s.liveAfterCurrentInstruction(v.Args[1]) {
   1238 						args[0], args[1] = args[1], args[0]
   1239 						goto ok
   1240 					}
   1241 					if s.values[v.Args[1].ID].rematerializeable {
   1242 						args[0], args[1] = args[1], args[0]
   1243 						goto ok
   1244 					}
   1245 					if countRegs(s.values[v.Args[1].ID].regs) >= 2 {
   1246 						args[0], args[1] = args[1], args[0]
   1247 						goto ok
   1248 					}
   1249 				}
   1250 
   1251 				// We can't overwrite arg0 (or arg1, if commutative).  So we
   1252 				// need to make a copy of an input so we have a register we can modify.
   1253 
   1254 				// Possible new registers to copy into.
   1255 				m = s.compatRegs(v.Args[0].Type) &^ s.used
   1256 				if m == 0 {
   1257 					// No free registers.  In this case we'll just clobber
   1258 					// an input and future uses of that input must use a restore.
   1259 					// TODO(khr): We should really do this like allocReg does it,
   1260 					// spilling the value with the most distant next use.
   1261 					goto ok
   1262 				}
   1263 
   1264 				// Try to move an input to the desired output.
   1265 				for _, r := range dinfo[idx].out {
   1266 					if r != noRegister && m>>r&1 != 0 {
   1267 						m = regMask(1) << r
   1268 						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
   1269 						// Note: we update args[0] so the instruction will
   1270 						// use the register copy we just made.
   1271 						goto ok
   1272 					}
   1273 				}
   1274 				// Try to copy input to its desired location & use its old
   1275 				// location as the result register.
   1276 				for _, r := range dinfo[idx].in[0] {
   1277 					if r != noRegister && m>>r&1 != 0 {
   1278 						m = regMask(1) << r
   1279 						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
   1280 						s.copies[c] = false
   1281 						// Note: no update to args[0] so the instruction will
   1282 						// use the original copy.
   1283 						goto ok
   1284 					}
   1285 				}
   1286 				if opcodeTable[v.Op].commutative {
   1287 					for _, r := range dinfo[idx].in[1] {
   1288 						if r != noRegister && m>>r&1 != 0 {
   1289 							m = regMask(1) << r
   1290 							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
   1291 							s.copies[c] = false
   1292 							args[0], args[1] = args[1], args[0]
   1293 							goto ok
   1294 						}
   1295 					}
   1296 				}
   1297 				// Avoid future fixed uses if we can.
   1298 				if m&^desired.avoid != 0 {
   1299 					m &^= desired.avoid
   1300 				}
   1301 				// Save input 0 to a new register so we can clobber it.
   1302 				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
   1303 				s.copies[c] = false
   1304 			}
   1305 
   1306 		ok:
   1307 			// Now that all args are in regs, we're ready to issue the value itself.
   1308 			// Before we pick a register for the output value, allow input registers
   1309 			// to be deallocated. We do this here so that the output can use the
   1310 			// same register as a dying input.
   1311 			if !opcodeTable[v.Op].resultNotInArgs {
   1312 				s.tmpused = s.nospill
   1313 				s.nospill = 0
   1314 				s.advanceUses(v) // frees any registers holding args that are no longer live
   1315 			}
   1316 
   1317 			// Dump any registers which will be clobbered
   1318 			s.freeRegs(regspec.clobbers)
   1319 			s.tmpused |= regspec.clobbers
   1320 
   1321 			// Pick registers for outputs.
   1322 			{
   1323 				outRegs := [2]register{noRegister, noRegister}
   1324 				var used regMask
   1325 				for _, out := range regspec.outputs {
   1326 					mask := out.regs & s.allocatable &^ used
   1327 					if mask == 0 {
   1328 						continue
   1329 					}
   1330 					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
   1331 						if !opcodeTable[v.Op].commutative {
   1332 							// Output must use the same register as input 0.
   1333 							r := register(s.f.getHome(args[0].ID).(*Register).num)
   1334 							mask = regMask(1) << r
   1335 						} else {
   1336 							// Output must use the same register as input 0 or 1.
   1337 							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
   1338 							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
   1339 							// Check r0 and r1 for desired output register.
   1340 							found := false
   1341 							for _, r := range dinfo[idx].out {
   1342 								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
   1343 									mask = regMask(1) << r
   1344 									found = true
   1345 									if r == r1 {
   1346 										args[0], args[1] = args[1], args[0]
   1347 									}
   1348 									break
   1349 								}
   1350 							}
   1351 							if !found {
   1352 								// Neither are desired, pick r0.
   1353 								mask = regMask(1) << r0
   1354 							}
   1355 						}
   1356 					}
   1357 					for _, r := range dinfo[idx].out {
   1358 						if r != noRegister && (mask&^s.used)>>r&1 != 0 {
   1359 							// Desired register is allowed and unused.
   1360 							mask = regMask(1) << r
   1361 							break
   1362 						}
   1363 					}
   1364 					// Avoid registers we're saving for other values.
   1365 					if mask&^desired.avoid != 0 {
   1366 						mask &^= desired.avoid
   1367 					}
   1368 					r := s.allocReg(mask, v)
   1369 					outRegs[out.idx] = r
   1370 					used |= regMask(1) << r
   1371 					s.tmpused |= regMask(1) << r
   1372 				}
   1373 				// Record register choices
   1374 				if v.Type.IsTuple() {
   1375 					var outLocs LocPair
   1376 					if r := outRegs[0]; r != noRegister {
   1377 						outLocs[0] = &s.registers[r]
   1378 					}
   1379 					if r := outRegs[1]; r != noRegister {
   1380 						outLocs[1] = &s.registers[r]
   1381 					}
   1382 					s.f.setHome(v, outLocs)
   1383 					// Note that subsequent SelectX instructions will do the assignReg calls.
   1384 				} else {
   1385 					if r := outRegs[0]; r != noRegister {
   1386 						s.assignReg(r, v, v)
   1387 					}
   1388 				}
   1389 			}
   1390 
   1391 			// deallocate dead args, if we have not done so
   1392 			if opcodeTable[v.Op].resultNotInArgs {
   1393 				s.nospill = 0
   1394 				s.advanceUses(v) // frees any registers holding args that are no longer live
   1395 			}
   1396 			s.tmpused = 0
   1397 
   1398 			// Issue the Value itself.
   1399 			for i, a := range args {
   1400 				v.SetArg(i, a) // use register version of arguments
   1401 			}
   1402 			b.Values = append(b.Values, v)
   1403 
   1404 		issueSpill:
   1405 		}
   1406 
   1407 		// Load control value into reg.
   1408 		if v := b.Control; v != nil && s.values[v.ID].needReg {
   1409 			if s.f.pass.debug > regDebug {
   1410 				fmt.Printf("  processing control %s\n", v.LongString())
   1411 			}
   1412 			// We assume that a control input can be passed in any
   1413 			// type-compatible register. If this turns out not to be true,
   1414 			// we'll need to introduce a regspec for a block's control value.
   1415 			b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos)
   1416 			if b.Control != v {
   1417 				v.Uses--
   1418 				b.Control.Uses++
   1419 			}
   1420 			// Remove this use from the uses list.
   1421 			vi := &s.values[v.ID]
   1422 			u := vi.uses
   1423 			vi.uses = u.next
   1424 			if u.next == nil {
   1425 				s.freeRegs(vi.regs) // value is dead
   1426 			}
   1427 			u.next = s.freeUseRecords
   1428 			s.freeUseRecords = u
   1429 		}
   1430 
   1431 		// Spill any values that can't live across basic block boundaries.
   1432 		if s.f.Config.use387 {
   1433 			s.freeRegs(s.f.Config.fpRegMask)
   1434 		}
   1435 
   1436 		// If we are approaching a merge point and we are the primary
   1437 		// predecessor of it, find live values that we use soon after
   1438 		// the merge point and promote them to registers now.
   1439 		if len(b.Succs) == 1 {
   1440 			// For this to be worthwhile, the loop must have no calls in it.
   1441 			top := b.Succs[0].b
   1442 			loop := s.loopnest.b2l[top.ID]
   1443 			if loop == nil || loop.header != top || loop.containsCall {
   1444 				goto badloop
   1445 			}
   1446 
   1447 			// TODO: sort by distance, pick the closest ones?
   1448 			for _, live := range s.live[b.ID] {
   1449 				if live.dist >= unlikelyDistance {
   1450 					// Don't preload anything live after the loop.
   1451 					continue
   1452 				}
   1453 				vid := live.ID
   1454 				vi := &s.values[vid]
   1455 				if vi.regs != 0 {
   1456 					continue
   1457 				}
   1458 				if vi.rematerializeable {
   1459 					continue
   1460 				}
   1461 				v := s.orig[vid]
   1462 				if s.f.Config.use387 && v.Type.IsFloat() {
   1463 					continue // 387 can't handle floats in registers between blocks
   1464 				}
   1465 				m := s.compatRegs(v.Type) &^ s.used
   1466 				if m&^desired.avoid != 0 {
   1467 					m &^= desired.avoid
   1468 				}
   1469 				if m != 0 {
   1470 					s.allocValToReg(v, m, false, b.Pos)
   1471 				}
   1472 			}
   1473 		}
   1474 	badloop:
   1475 		;
   1476 
   1477 		// Save end-of-block register state.
   1478 		// First count how many, this cuts allocations in half.
   1479 		k := 0
   1480 		for r := register(0); r < s.numRegs; r++ {
   1481 			v := s.regs[r].v
   1482 			if v == nil {
   1483 				continue
   1484 			}
   1485 			k++
   1486 		}
   1487 		regList := make([]endReg, 0, k)
   1488 		for r := register(0); r < s.numRegs; r++ {
   1489 			v := s.regs[r].v
   1490 			if v == nil {
   1491 				continue
   1492 			}
   1493 			regList = append(regList, endReg{r, v, s.regs[r].c})
   1494 		}
   1495 		s.endRegs[b.ID] = regList
   1496 
   1497 		if checkEnabled {
   1498 			regValLiveSet.clear()
   1499 			for _, x := range s.live[b.ID] {
   1500 				regValLiveSet.add(x.ID)
   1501 			}
   1502 			for r := register(0); r < s.numRegs; r++ {
   1503 				v := s.regs[r].v
   1504 				if v == nil {
   1505 					continue
   1506 				}
   1507 				if !regValLiveSet.contains(v.ID) {
   1508 					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
   1509 				}
   1510 			}
   1511 		}
   1512 
   1513 		// If a value is live at the end of the block and
   1514 		// isn't in a register, generate a use for the spill location.
   1515 		// We need to remember this information so that
   1516 		// the liveness analysis in stackalloc is correct.
   1517 		for _, e := range s.live[b.ID] {
   1518 			vi := &s.values[e.ID]
   1519 			if vi.regs != 0 {
   1520 				// in a register, we'll use that source for the merge.
   1521 				continue
   1522 			}
   1523 			if vi.rematerializeable {
   1524 				// we'll rematerialize during the merge.
   1525 				continue
   1526 			}
   1527 			//fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
   1528 			spill := s.makeSpill(s.orig[e.ID], b)
   1529 			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
   1530 		}
   1531 
   1532 		// Clear any final uses.
   1533 		// All that is left should be the pseudo-uses added for values which
   1534 		// are live at the end of b.
   1535 		for _, e := range s.live[b.ID] {
   1536 			u := s.values[e.ID].uses
   1537 			if u == nil {
   1538 				f.Fatalf("live at end, no uses v%d", e.ID)
   1539 			}
   1540 			if u.next != nil {
   1541 				f.Fatalf("live at end, too many uses v%d", e.ID)
   1542 			}
   1543 			s.values[e.ID].uses = nil
   1544 			u.next = s.freeUseRecords
   1545 			s.freeUseRecords = u
   1546 		}
   1547 	}
   1548 
   1549 	// Decide where the spills we generated will go.
   1550 	s.placeSpills()
   1551 
   1552 	// Anything that didn't get a register gets a stack location here.
   1553 	// (StoreReg, stack-based phis, inputs, ...)
   1554 	stacklive := stackalloc(s.f, s.spillLive)
   1555 
   1556 	// Fix up all merge edges.
   1557 	s.shuffle(stacklive)
   1558 
   1559 	// Erase any copies we never used.
   1560 	// Also, an unused copy might be the only use of another copy,
   1561 	// so continue erasing until we reach a fixed point.
   1562 	for {
   1563 		progress := false
   1564 		for c, used := range s.copies {
   1565 			if !used && c.Uses == 0 {
   1566 				if s.f.pass.debug > regDebug {
   1567 					fmt.Printf("delete copied value %s\n", c.LongString())
   1568 				}
   1569 				c.RemoveArg(0)
   1570 				f.freeValue(c)
   1571 				delete(s.copies, c)
   1572 				progress = true
   1573 			}
   1574 		}
   1575 		if !progress {
   1576 			break
   1577 		}
   1578 	}
   1579 
   1580 	for _, b := range f.Blocks {
   1581 		i := 0
   1582 		for _, v := range b.Values {
   1583 			if v.Op == OpInvalid {
   1584 				continue
   1585 			}
   1586 			b.Values[i] = v
   1587 			i++
   1588 		}
   1589 		b.Values = b.Values[:i]
   1590 	}
   1591 }
   1592 
   1593 func (s *regAllocState) placeSpills() {
   1594 	f := s.f
   1595 
   1596 	// Precompute some useful info.
   1597 	phiRegs := make([]regMask, f.NumBlocks())
   1598 	for _, b := range f.Blocks {
   1599 		var m regMask
   1600 		for _, v := range b.Values {
   1601 			if v.Op == OpRegKill {
   1602 				continue
   1603 			}
   1604 			if v.Op != OpPhi {
   1605 				break
   1606 			}
   1607 			if r, ok := f.getHome(v.ID).(*Register); ok {
   1608 				m |= regMask(1) << uint(r.num)
   1609 			}
   1610 		}
   1611 		phiRegs[b.ID] = m
   1612 	}
   1613 
   1614 	// Start maps block IDs to the list of spills
   1615 	// that go at the start of the block (but after any phis).
   1616 	start := map[ID][]*Value{}
   1617 	// After maps value IDs to the list of spills
   1618 	// that go immediately after that value ID.
   1619 	after := map[ID][]*Value{}
   1620 
   1621 	for i := range s.values {
   1622 		vi := s.values[i]
   1623 		spill := vi.spill
   1624 		if spill == nil {
   1625 			continue
   1626 		}
   1627 		if spill.Block != nil {
   1628 			// Some spills are already fully set up,
   1629 			// like OpArgs and stack-based phis.
   1630 			continue
   1631 		}
   1632 		v := s.orig[i]
   1633 
   1634 		// Walk down the dominator tree looking for a good place to
   1635 		// put the spill of v.  At the start "best" is the best place
   1636 		// we have found so far.
   1637 		// TODO: find a way to make this O(1) without arbitrary cutoffs.
   1638 		best := v.Block
   1639 		bestArg := v
   1640 		var bestDepth int16
   1641 		if l := s.loopnest.b2l[best.ID]; l != nil {
   1642 			bestDepth = l.depth
   1643 		}
   1644 		b := best
   1645 		const maxSpillSearch = 100
   1646 		for i := 0; i < maxSpillSearch; i++ {
   1647 			// Find the child of b in the dominator tree which
   1648 			// dominates all restores.
   1649 			p := b
   1650 			b = nil
   1651 			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
   1652 				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
   1653 					// c also dominates all restores.  Walk down into c.
   1654 					b = c
   1655 					break
   1656 				}
   1657 			}
   1658 			if b == nil {
   1659 				// Ran out of blocks which dominate all restores.
   1660 				break
   1661 			}
   1662 
   1663 			var depth int16
   1664 			if l := s.loopnest.b2l[b.ID]; l != nil {
   1665 				depth = l.depth
   1666 			}
   1667 			if depth > bestDepth {
   1668 				// Don't push the spill into a deeper loop.
   1669 				continue
   1670 			}
   1671 
   1672 			// If v is in a register at the start of b, we can
   1673 			// place the spill here (after the phis).
   1674 			if len(b.Preds) == 1 {
   1675 				for _, e := range s.endRegs[b.Preds[0].b.ID] {
   1676 					if e.v == v {
   1677 						// Found a better spot for the spill.
   1678 						best = b
   1679 						bestArg = e.c
   1680 						bestDepth = depth
   1681 						break
   1682 					}
   1683 				}
   1684 			} else {
   1685 				for _, e := range s.startRegs[b.ID] {
   1686 					if e.v == v {
   1687 						// Found a better spot for the spill.
   1688 						best = b
   1689 						bestArg = e.c
   1690 						bestDepth = depth
   1691 						break
   1692 					}
   1693 				}
   1694 			}
   1695 		}
   1696 
   1697 		// Put the spill in the best block we found.
   1698 		spill.Block = best
   1699 		spill.AddArg(bestArg)
   1700 		if best == v.Block && v.Op != OpPhi {
   1701 			// Place immediately after v.
   1702 			after[v.ID] = append(after[v.ID], spill)
   1703 		} else {
   1704 			// Place at the start of best block.
   1705 			start[best.ID] = append(start[best.ID], spill)
   1706 		}
   1707 	}
   1708 
   1709 	// Insert spill instructions into the block schedules.
   1710 	var oldSched []*Value
   1711 	for _, b := range f.Blocks {
   1712 		nphi := 0
   1713 		for _, v := range b.Values {
   1714 			if v.Op != OpRegKill && v.Op != OpPhi {
   1715 				break
   1716 			}
   1717 			nphi++
   1718 		}
   1719 		oldSched = append(oldSched[:0], b.Values[nphi:]...)
   1720 		b.Values = b.Values[:nphi]
   1721 		b.Values = append(b.Values, start[b.ID]...)
   1722 		for _, v := range oldSched {
   1723 			b.Values = append(b.Values, v)
   1724 			b.Values = append(b.Values, after[v.ID]...)
   1725 		}
   1726 	}
   1727 }
   1728 
   1729 // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
   1730 func (s *regAllocState) shuffle(stacklive [][]ID) {
   1731 	var e edgeState
   1732 	e.s = s
   1733 	e.cache = map[ID][]*Value{}
   1734 	e.contents = map[Location]contentRecord{}
   1735 	if s.f.pass.debug > regDebug {
   1736 		fmt.Printf("shuffle %s\n", s.f.Name)
   1737 		fmt.Println(s.f.String())
   1738 	}
   1739 
   1740 	for _, b := range s.f.Blocks {
   1741 		if len(b.Preds) <= 1 {
   1742 			continue
   1743 		}
   1744 		e.b = b
   1745 		for i, edge := range b.Preds {
   1746 			p := edge.b
   1747 			e.p = p
   1748 			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
   1749 			e.process()
   1750 		}
   1751 	}
   1752 }
   1753 
   1754 type edgeState struct {
   1755 	s    *regAllocState
   1756 	p, b *Block // edge goes from p->b.
   1757 
   1758 	// for each pre-regalloc value, a list of equivalent cached values
   1759 	cache      map[ID][]*Value
   1760 	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
   1761 
   1762 	// map from location to the value it contains
   1763 	contents map[Location]contentRecord
   1764 
   1765 	// desired destination locations
   1766 	destinations []dstRecord
   1767 	extra        []dstRecord
   1768 
   1769 	usedRegs   regMask // registers currently holding something
   1770 	uniqueRegs regMask // registers holding the only copy of a value
   1771 	finalRegs  regMask // registers holding final target
   1772 }
   1773 
   1774 type contentRecord struct {
   1775 	vid   ID       // pre-regalloc value
   1776 	c     *Value   // cached value
   1777 	final bool     // this is a satisfied destination
   1778 	pos   src.XPos // source position of use of the value
   1779 }
   1780 
   1781 type dstRecord struct {
   1782 	loc    Location // register or stack slot
   1783 	vid    ID       // pre-regalloc value it should contain
   1784 	splice **Value  // place to store reference to the generating instruction
   1785 	pos    src.XPos // source position of use of this location
   1786 }
   1787 
   1788 // setup initializes the edge state for shuffling.
   1789 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
   1790 	if e.s.f.pass.debug > regDebug {
   1791 		fmt.Printf("edge %s->%s\n", e.p, e.b)
   1792 	}
   1793 
   1794 	// Clear state.
   1795 	for _, vid := range e.cachedVals {
   1796 		delete(e.cache, vid)
   1797 	}
   1798 	e.cachedVals = e.cachedVals[:0]
   1799 	for k := range e.contents {
   1800 		delete(e.contents, k)
   1801 	}
   1802 	e.usedRegs = 0
   1803 	e.uniqueRegs = 0
   1804 	e.finalRegs = 0
   1805 
   1806 	// Live registers can be sources.
   1807 	for _, x := range srcReg {
   1808 		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
   1809 	}
   1810 	// So can all of the spill locations.
   1811 	for _, spillID := range stacklive {
   1812 		v := e.s.orig[spillID]
   1813 		spill := e.s.values[v.ID].spill
   1814 		if !e.s.sdom.isAncestorEq(spill.Block, e.p) {
   1815 			// Spills were placed that only dominate the uses found
   1816 			// during the first regalloc pass. The edge fixup code
   1817 			// can't use a spill location if the spill doesn't dominate
   1818 			// the edge.
   1819 			// We are guaranteed that if the spill doesn't dominate this edge,
   1820 			// then the value is available in a register (because we called
   1821 			// makeSpill for every value not in a register at the start
   1822 			// of an edge).
   1823 			continue
   1824 		}
   1825 		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
   1826 	}
   1827 
   1828 	// Figure out all the destinations we need.
   1829 	dsts := e.destinations[:0]
   1830 	for _, x := range dstReg {
   1831 		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
   1832 	}
   1833 	// Phis need their args to end up in a specific location.
   1834 	for _, v := range e.b.Values {
   1835 		if v.Op == OpRegKill {
   1836 			continue
   1837 		}
   1838 		if v.Op != OpPhi {
   1839 			break
   1840 		}
   1841 		loc := e.s.f.getHome(v.ID)
   1842 		if loc == nil {
   1843 			continue
   1844 		}
   1845 		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
   1846 	}
   1847 	e.destinations = dsts
   1848 
   1849 	if e.s.f.pass.debug > regDebug {
   1850 		for _, vid := range e.cachedVals {
   1851 			a := e.cache[vid]
   1852 			for _, c := range a {
   1853 				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
   1854 			}
   1855 		}
   1856 		for _, d := range e.destinations {
   1857 			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
   1858 		}
   1859 	}
   1860 }
   1861 
   1862 // process generates code to move all the values to the right destination locations.
   1863 func (e *edgeState) process() {
   1864 	dsts := e.destinations
   1865 
   1866 	// Process the destinations until they are all satisfied.
   1867 	for len(dsts) > 0 {
   1868 		i := 0
   1869 		for _, d := range dsts {
   1870 			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
   1871 				// Failed - save for next iteration.
   1872 				dsts[i] = d
   1873 				i++
   1874 			}
   1875 		}
   1876 		if i < len(dsts) {
   1877 			// Made some progress. Go around again.
   1878 			dsts = dsts[:i]
   1879 
   1880 			// Append any extras destinations we generated.
   1881 			dsts = append(dsts, e.extra...)
   1882 			e.extra = e.extra[:0]
   1883 			continue
   1884 		}
   1885 
   1886 		// We made no progress. That means that any
   1887 		// remaining unsatisfied moves are in simple cycles.
   1888 		// For example, A -> B -> C -> D -> A.
   1889 		//   A ----> B
   1890 		//   ^       |
   1891 		//   |       |
   1892 		//   |       v
   1893 		//   D <---- C
   1894 
   1895 		// To break the cycle, we pick an unused register, say R,
   1896 		// and put a copy of B there.
   1897 		//   A ----> B
   1898 		//   ^       |
   1899 		//   |       |
   1900 		//   |       v
   1901 		//   D <---- C <---- R=copyofB
   1902 		// When we resume the outer loop, the A->B move can now proceed,
   1903 		// and eventually the whole cycle completes.
   1904 
   1905 		// Copy any cycle location to a temp register. This duplicates
   1906 		// one of the cycle entries, allowing the just duplicated value
   1907 		// to be overwritten and the cycle to proceed.
   1908 		d := dsts[0]
   1909 		loc := d.loc
   1910 		vid := e.contents[loc].vid
   1911 		c := e.contents[loc].c
   1912 		r := e.findRegFor(c.Type)
   1913 		if e.s.f.pass.debug > regDebug {
   1914 			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
   1915 		}
   1916 		e.erase(r)
   1917 		if _, isReg := loc.(*Register); isReg {
   1918 			c = e.p.NewValue1(d.pos, OpCopy, c.Type, c)
   1919 		} else {
   1920 			c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c)
   1921 		}
   1922 		e.set(r, vid, c, false, d.pos)
   1923 	}
   1924 }
   1925 
   1926 // processDest generates code to put value vid into location loc. Returns true
   1927 // if progress was made.
   1928 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
   1929 	occupant := e.contents[loc]
   1930 	if occupant.vid == vid {
   1931 		// Value is already in the correct place.
   1932 		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
   1933 		if splice != nil {
   1934 			(*splice).Uses--
   1935 			*splice = occupant.c
   1936 			occupant.c.Uses++
   1937 		}
   1938 		// Note: if splice==nil then c will appear dead. This is
   1939 		// non-SSA formed code, so be careful after this pass not to run
   1940 		// deadcode elimination.
   1941 		if _, ok := e.s.copies[occupant.c]; ok {
   1942 			// The copy at occupant.c was used to avoid spill.
   1943 			e.s.copies[occupant.c] = true
   1944 		}
   1945 		return true
   1946 	}
   1947 
   1948 	// Check if we're allowed to clobber the destination location.
   1949 	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
   1950 		// We can't overwrite the last copy
   1951 		// of a value that needs to survive.
   1952 		return false
   1953 	}
   1954 
   1955 	// Copy from a source of v, register preferred.
   1956 	v := e.s.orig[vid]
   1957 	var c *Value
   1958 	var src Location
   1959 	if e.s.f.pass.debug > regDebug {
   1960 		fmt.Printf("moving v%d to %s\n", vid, loc)
   1961 		fmt.Printf("sources of v%d:", vid)
   1962 	}
   1963 	for _, w := range e.cache[vid] {
   1964 		h := e.s.f.getHome(w.ID)
   1965 		if e.s.f.pass.debug > regDebug {
   1966 			fmt.Printf(" %s:%s", h, w)
   1967 		}
   1968 		_, isreg := h.(*Register)
   1969 		if src == nil || isreg {
   1970 			c = w
   1971 			src = h
   1972 		}
   1973 	}
   1974 	if e.s.f.pass.debug > regDebug {
   1975 		if src != nil {
   1976 			fmt.Printf(" [use %s]\n", src)
   1977 		} else {
   1978 			fmt.Printf(" [no source]\n")
   1979 		}
   1980 	}
   1981 	_, dstReg := loc.(*Register)
   1982 
   1983 	// Pre-clobber destination. This avoids the
   1984 	// following situation:
   1985 	//   - v is currently held in R0 and stacktmp0.
   1986 	//   - We want to copy stacktmp1 to stacktmp0.
   1987 	//   - We choose R0 as the temporary register.
   1988 	// During the copy, both R0 and stacktmp0 are
   1989 	// clobbered, losing both copies of v. Oops!
   1990 	// Erasing the destination early means R0 will not
   1991 	// be chosen as the temp register, as it will then
   1992 	// be the last copy of v.
   1993 	e.erase(loc)
   1994 	var x *Value
   1995 	if c == nil {
   1996 		if !e.s.values[vid].rematerializeable {
   1997 			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
   1998 		}
   1999 		if dstReg {
   2000 			x = v.copyIntoNoXPos(e.p)
   2001 		} else {
   2002 			// Rematerialize into stack slot. Need a free
   2003 			// register to accomplish this.
   2004 			r := e.findRegFor(v.Type)
   2005 			e.erase(r)
   2006 			x = v.copyIntoWithXPos(e.p, pos)
   2007 			e.set(r, vid, x, false, pos)
   2008 			// Make sure we spill with the size of the slot, not the
   2009 			// size of x (which might be wider due to our dropping
   2010 			// of narrowing conversions).
   2011 			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
   2012 		}
   2013 	} else {
   2014 		// Emit move from src to dst.
   2015 		_, srcReg := src.(*Register)
   2016 		if srcReg {
   2017 			if dstReg {
   2018 				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
   2019 			} else {
   2020 				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
   2021 			}
   2022 		} else {
   2023 			if dstReg {
   2024 				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
   2025 			} else {
   2026 				// mem->mem. Use temp register.
   2027 				r := e.findRegFor(c.Type)
   2028 				e.erase(r)
   2029 				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
   2030 				e.set(r, vid, t, false, pos)
   2031 				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
   2032 			}
   2033 		}
   2034 	}
   2035 	e.set(loc, vid, x, true, pos)
   2036 	if splice != nil {
   2037 		(*splice).Uses--
   2038 		*splice = x
   2039 		x.Uses++
   2040 	}
   2041 	return true
   2042 }
   2043 
   2044 // set changes the contents of location loc to hold the given value and its cached representative.
   2045 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
   2046 	e.s.f.setHome(c, loc)
   2047 	e.contents[loc] = contentRecord{vid, c, final, pos}
   2048 	a := e.cache[vid]
   2049 	if len(a) == 0 {
   2050 		e.cachedVals = append(e.cachedVals, vid)
   2051 	}
   2052 	a = append(a, c)
   2053 	e.cache[vid] = a
   2054 	if r, ok := loc.(*Register); ok {
   2055 		e.usedRegs |= regMask(1) << uint(r.num)
   2056 		if final {
   2057 			e.finalRegs |= regMask(1) << uint(r.num)
   2058 		}
   2059 		if len(a) == 1 {
   2060 			e.uniqueRegs |= regMask(1) << uint(r.num)
   2061 		}
   2062 		if len(a) == 2 {
   2063 			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
   2064 				e.uniqueRegs &^= regMask(1) << uint(t.num)
   2065 			}
   2066 		}
   2067 	}
   2068 	if e.s.f.pass.debug > regDebug {
   2069 		fmt.Printf("%s\n", c.LongString())
   2070 		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
   2071 	}
   2072 }
   2073 
   2074 // erase removes any user of loc.
   2075 func (e *edgeState) erase(loc Location) {
   2076 	cr := e.contents[loc]
   2077 	if cr.c == nil {
   2078 		return
   2079 	}
   2080 	vid := cr.vid
   2081 
   2082 	if cr.final {
   2083 		// Add a destination to move this value back into place.
   2084 		// Make sure it gets added to the tail of the destination queue
   2085 		// so we make progress on other moves first.
   2086 		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
   2087 	}
   2088 
   2089 	// Remove c from the list of cached values.
   2090 	a := e.cache[vid]
   2091 	for i, c := range a {
   2092 		if e.s.f.getHome(c.ID) == loc {
   2093 			if e.s.f.pass.debug > regDebug {
   2094 				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
   2095 			}
   2096 			a[i], a = a[len(a)-1], a[:len(a)-1]
   2097 			if e.s.f.Config.ctxt.Flag_locationlists {
   2098 				if _, isReg := loc.(*Register); isReg && int(c.ID) < len(e.s.valueNames) && len(e.s.valueNames[c.ID]) != 0 {
   2099 					kill := e.p.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid)
   2100 					e.s.f.setHome(kill, loc)
   2101 					for _, name := range e.s.valueNames[c.ID] {
   2102 						e.s.f.NamedValues[name] = append(e.s.f.NamedValues[name], kill)
   2103 					}
   2104 				}
   2105 			}
   2106 
   2107 			break
   2108 		}
   2109 	}
   2110 	e.cache[vid] = a
   2111 
   2112 	// Update register masks.
   2113 	if r, ok := loc.(*Register); ok {
   2114 		e.usedRegs &^= regMask(1) << uint(r.num)
   2115 		if cr.final {
   2116 			e.finalRegs &^= regMask(1) << uint(r.num)
   2117 		}
   2118 	}
   2119 	if len(a) == 1 {
   2120 		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
   2121 			e.uniqueRegs |= regMask(1) << uint(r.num)
   2122 		}
   2123 	}
   2124 }
   2125 
   2126 // findRegFor finds a register we can use to make a temp copy of type typ.
   2127 func (e *edgeState) findRegFor(typ *types.Type) Location {
   2128 	// Which registers are possibilities.
   2129 	var m regMask
   2130 	types := &e.s.f.Config.Types
   2131 	if typ.IsFloat() {
   2132 		m = e.s.compatRegs(types.Float64)
   2133 	} else {
   2134 		m = e.s.compatRegs(types.Int64)
   2135 	}
   2136 
   2137 	// Pick a register. In priority order:
   2138 	// 1) an unused register
   2139 	// 2) a non-unique register not holding a final value
   2140 	// 3) a non-unique register
   2141 	// 4) TODO: a register holding a rematerializeable value
   2142 	x := m &^ e.usedRegs
   2143 	if x != 0 {
   2144 		return &e.s.registers[pickReg(x)]
   2145 	}
   2146 	x = m &^ e.uniqueRegs &^ e.finalRegs
   2147 	if x != 0 {
   2148 		return &e.s.registers[pickReg(x)]
   2149 	}
   2150 	x = m &^ e.uniqueRegs
   2151 	if x != 0 {
   2152 		return &e.s.registers[pickReg(x)]
   2153 	}
   2154 
   2155 	// No register is available.
   2156 	// Pick a register to spill.
   2157 	for _, vid := range e.cachedVals {
   2158 		a := e.cache[vid]
   2159 		for _, c := range a {
   2160 			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
   2161 				if !c.rematerializeable() {
   2162 					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
   2163 					// Allocate a temp location to spill a register to.
   2164 					// The type of the slot is immaterial - it will not be live across
   2165 					// any safepoint. Just use a type big enough to hold any register.
   2166 					t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64}
   2167 					// TODO: reuse these slots. They'll need to be erased first.
   2168 					e.set(t, vid, x, false, c.Pos)
   2169 					if e.s.f.pass.debug > regDebug {
   2170 						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
   2171 					}
   2172 				}
   2173 				// r will now be overwritten by the caller. At some point
   2174 				// later, the newly saved value will be moved back to its
   2175 				// final destination in processDest.
   2176 				return r
   2177 			}
   2178 		}
   2179 	}
   2180 
   2181 	fmt.Printf("m:%d unique:%d final:%d\n", m, e.uniqueRegs, e.finalRegs)
   2182 	for _, vid := range e.cachedVals {
   2183 		a := e.cache[vid]
   2184 		for _, c := range a {
   2185 			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
   2186 		}
   2187 	}
   2188 	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
   2189 	return nil
   2190 }
   2191 
   2192 // rematerializeable reports whether the register allocator should recompute
   2193 // a value instead of spilling/restoring it.
   2194 func (v *Value) rematerializeable() bool {
   2195 	if !opcodeTable[v.Op].rematerializeable {
   2196 		return false
   2197 	}
   2198 	for _, a := range v.Args {
   2199 		// SP and SB (generated by OpSP and OpSB) are always available.
   2200 		if a.Op != OpSP && a.Op != OpSB {
   2201 			return false
   2202 		}
   2203 	}
   2204 	return true
   2205 }
   2206 
   2207 type liveInfo struct {
   2208 	ID   ID       // ID of value
   2209 	dist int32    // # of instructions before next use
   2210 	pos  src.XPos // source position of next use
   2211 }
   2212 
   2213 // computeLive computes a map from block ID to a list of value IDs live at the end
   2214 // of that block. Together with the value ID is a count of how many instructions
   2215 // to the next use of that value. The resulting map is stored in s.live.
   2216 // computeLive also computes the desired register information at the end of each block.
   2217 // This desired register information is stored in s.desired.
   2218 // TODO: this could be quadratic if lots of variables are live across lots of
   2219 // basic blocks. Figure out a way to make this function (or, more precisely, the user
   2220 // of this function) require only linear size & time.
   2221 func (s *regAllocState) computeLive() {
   2222 	f := s.f
   2223 	s.live = make([][]liveInfo, f.NumBlocks())
   2224 	s.desired = make([]desiredState, f.NumBlocks())
   2225 	var phis []*Value
   2226 
   2227 	live := newSparseMap(f.NumValues())
   2228 	t := newSparseMap(f.NumValues())
   2229 
   2230 	// Keep track of which value we want in each register.
   2231 	var desired desiredState
   2232 
   2233 	// Instead of iterating over f.Blocks, iterate over their postordering.
   2234 	// Liveness information flows backward, so starting at the end
   2235 	// increases the probability that we will stabilize quickly.
   2236 	// TODO: Do a better job yet. Here's one possibility:
   2237 	// Calculate the dominator tree and locate all strongly connected components.
   2238 	// If a value is live in one block of an SCC, it is live in all.
   2239 	// Walk the dominator tree from end to beginning, just once, treating SCC
   2240 	// components as single blocks, duplicated calculated liveness information
   2241 	// out to all of them.
   2242 	po := f.postorder()
   2243 	s.loopnest = f.loopnest()
   2244 	s.loopnest.calculateDepths()
   2245 	for {
   2246 		changed := false
   2247 
   2248 		for _, b := range po {
   2249 			// Start with known live values at the end of the block.
   2250 			// Add len(b.Values) to adjust from end-of-block distance
   2251 			// to beginning-of-block distance.
   2252 			live.clear()
   2253 			for _, e := range s.live[b.ID] {
   2254 				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
   2255 			}
   2256 
   2257 			// Mark control value as live
   2258 			if b.Control != nil && s.values[b.Control.ID].needReg {
   2259 				live.set(b.Control.ID, int32(len(b.Values)), b.Pos)
   2260 			}
   2261 
   2262 			// Propagate backwards to the start of the block
   2263 			// Assumes Values have been scheduled.
   2264 			phis = phis[:0]
   2265 			for i := len(b.Values) - 1; i >= 0; i-- {
   2266 				v := b.Values[i]
   2267 				live.remove(v.ID)
   2268 				if v.Op == OpPhi {
   2269 					// save phi ops for later
   2270 					phis = append(phis, v)
   2271 					continue
   2272 				}
   2273 				if opcodeTable[v.Op].call {
   2274 					c := live.contents()
   2275 					for i := range c {
   2276 						c[i].val += unlikelyDistance
   2277 					}
   2278 				}
   2279 				for _, a := range v.Args {
   2280 					if s.values[a.ID].needReg {
   2281 						live.set(a.ID, int32(i), v.Pos)
   2282 					}
   2283 				}
   2284 			}
   2285 			// Propagate desired registers backwards.
   2286 			desired.copy(&s.desired[b.ID])
   2287 			for i := len(b.Values) - 1; i >= 0; i-- {
   2288 				v := b.Values[i]
   2289 				prefs := desired.remove(v.ID)
   2290 				if v.Op == OpPhi {
   2291 					// TODO: if v is a phi, save desired register for phi inputs.
   2292 					// For now, we just drop it and don't propagate
   2293 					// desired registers back though phi nodes.
   2294 					continue
   2295 				}
   2296 				// Cancel desired registers if they get clobbered.
   2297 				desired.clobber(opcodeTable[v.Op].reg.clobbers)
   2298 				// Update desired registers if there are any fixed register inputs.
   2299 				for _, j := range opcodeTable[v.Op].reg.inputs {
   2300 					if countRegs(j.regs) != 1 {
   2301 						continue
   2302 					}
   2303 					desired.clobber(j.regs)
   2304 					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
   2305 				}
   2306 				// Set desired register of input 0 if this is a 2-operand instruction.
   2307 				if opcodeTable[v.Op].resultInArg0 {
   2308 					if opcodeTable[v.Op].commutative {
   2309 						desired.addList(v.Args[1].ID, prefs)
   2310 					}
   2311 					desired.addList(v.Args[0].ID, prefs)
   2312 				}
   2313 			}
   2314 
   2315 			// For each predecessor of b, expand its list of live-at-end values.
   2316 			// invariant: live contains the values live at the start of b (excluding phi inputs)
   2317 			for i, e := range b.Preds {
   2318 				p := e.b
   2319 				// Compute additional distance for the edge.
   2320 				// Note: delta must be at least 1 to distinguish the control
   2321 				// value use from the first user in a successor block.
   2322 				delta := int32(normalDistance)
   2323 				if len(p.Succs) == 2 {
   2324 					if p.Succs[0].b == b && p.Likely == BranchLikely ||
   2325 						p.Succs[1].b == b && p.Likely == BranchUnlikely {
   2326 						delta = likelyDistance
   2327 					}
   2328 					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
   2329 						p.Succs[1].b == b && p.Likely == BranchLikely {
   2330 						delta = unlikelyDistance
   2331 					}
   2332 				}
   2333 
   2334 				// Update any desired registers at the end of p.
   2335 				s.desired[p.ID].merge(&desired)
   2336 
   2337 				// Start t off with the previously known live values at the end of p.
   2338 				t.clear()
   2339 				for _, e := range s.live[p.ID] {
   2340 					t.set(e.ID, e.dist, e.pos)
   2341 				}
   2342 				update := false
   2343 
   2344 				// Add new live values from scanning this block.
   2345 				for _, e := range live.contents() {
   2346 					d := e.val + delta
   2347 					if !t.contains(e.key) || d < t.get(e.key) {
   2348 						update = true
   2349 						t.set(e.key, d, e.aux)
   2350 					}
   2351 				}
   2352 				// Also add the correct arg from the saved phi values.
   2353 				// All phis are at distance delta (we consider them
   2354 				// simultaneously happening at the start of the block).
   2355 				for _, v := range phis {
   2356 					id := v.Args[i].ID
   2357 					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
   2358 						update = true
   2359 						t.set(id, delta, v.Pos)
   2360 					}
   2361 				}
   2362 
   2363 				if !update {
   2364 					continue
   2365 				}
   2366 				// The live set has changed, update it.
   2367 				l := s.live[p.ID][:0]
   2368 				if cap(l) < t.size() {
   2369 					l = make([]liveInfo, 0, t.size())
   2370 				}
   2371 				for _, e := range t.contents() {
   2372 					l = append(l, liveInfo{e.key, e.val, e.aux})
   2373 				}
   2374 				s.live[p.ID] = l
   2375 				changed = true
   2376 			}
   2377 		}
   2378 
   2379 		if !changed {
   2380 			break
   2381 		}
   2382 	}
   2383 	if f.pass.debug > regDebug {
   2384 		fmt.Println("live values at end of each block")
   2385 		for _, b := range f.Blocks {
   2386 			fmt.Printf("  %s:", b)
   2387 			for _, x := range s.live[b.ID] {
   2388 				fmt.Printf(" v%d", x.ID)
   2389 				for _, e := range s.desired[b.ID].entries {
   2390 					if e.ID != x.ID {
   2391 						continue
   2392 					}
   2393 					fmt.Printf("[")
   2394 					first := true
   2395 					for _, r := range e.regs {
   2396 						if r == noRegister {
   2397 							continue
   2398 						}
   2399 						if !first {
   2400 							fmt.Printf(",")
   2401 						}
   2402 						fmt.Print(&s.registers[r])
   2403 						first = false
   2404 					}
   2405 					fmt.Printf("]")
   2406 				}
   2407 			}
   2408 			fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid))
   2409 			fmt.Println()
   2410 		}
   2411 	}
   2412 }
   2413 
   2414 // A desiredState represents desired register assignments.
   2415 type desiredState struct {
   2416 	// Desired assignments will be small, so we just use a list
   2417 	// of valueID+registers entries.
   2418 	entries []desiredStateEntry
   2419 	// Registers that other values want to be in.  This value will
   2420 	// contain at least the union of the regs fields of entries, but
   2421 	// may contain additional entries for values that were once in
   2422 	// this data structure but are no longer.
   2423 	avoid regMask
   2424 }
   2425 type desiredStateEntry struct {
   2426 	// (pre-regalloc) value
   2427 	ID ID
   2428 	// Registers it would like to be in, in priority order.
   2429 	// Unused slots are filled with noRegister.
   2430 	regs [4]register
   2431 }
   2432 
   2433 func (d *desiredState) clear() {
   2434 	d.entries = d.entries[:0]
   2435 	d.avoid = 0
   2436 }
   2437 
   2438 // get returns a list of desired registers for value vid.
   2439 func (d *desiredState) get(vid ID) [4]register {
   2440 	for _, e := range d.entries {
   2441 		if e.ID == vid {
   2442 			return e.regs
   2443 		}
   2444 	}
   2445 	return [4]register{noRegister, noRegister, noRegister, noRegister}
   2446 }
   2447 
   2448 // add records that we'd like value vid to be in register r.
   2449 func (d *desiredState) add(vid ID, r register) {
   2450 	d.avoid |= regMask(1) << r
   2451 	for i := range d.entries {
   2452 		e := &d.entries[i]
   2453 		if e.ID != vid {
   2454 			continue
   2455 		}
   2456 		if e.regs[0] == r {
   2457 			// Already known and highest priority
   2458 			return
   2459 		}
   2460 		for j := 1; j < len(e.regs); j++ {
   2461 			if e.regs[j] == r {
   2462 				// Move from lower priority to top priority
   2463 				copy(e.regs[1:], e.regs[:j])
   2464 				e.regs[0] = r
   2465 				return
   2466 			}
   2467 		}
   2468 		copy(e.regs[1:], e.regs[:])
   2469 		e.regs[0] = r
   2470 		return
   2471 	}
   2472 	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
   2473 }
   2474 
   2475 func (d *desiredState) addList(vid ID, regs [4]register) {
   2476 	// regs is in priority order, so iterate in reverse order.
   2477 	for i := len(regs) - 1; i >= 0; i-- {
   2478 		r := regs[i]
   2479 		if r != noRegister {
   2480 			d.add(vid, r)
   2481 		}
   2482 	}
   2483 }
   2484 
   2485 // clobber erases any desired registers in the set m.
   2486 func (d *desiredState) clobber(m regMask) {
   2487 	for i := 0; i < len(d.entries); {
   2488 		e := &d.entries[i]
   2489 		j := 0
   2490 		for _, r := range e.regs {
   2491 			if r != noRegister && m>>r&1 == 0 {
   2492 				e.regs[j] = r
   2493 				j++
   2494 			}
   2495 		}
   2496 		if j == 0 {
   2497 			// No more desired registers for this value.
   2498 			d.entries[i] = d.entries[len(d.entries)-1]
   2499 			d.entries = d.entries[:len(d.entries)-1]
   2500 			continue
   2501 		}
   2502 		for ; j < len(e.regs); j++ {
   2503 			e.regs[j] = noRegister
   2504 		}
   2505 		i++
   2506 	}
   2507 	d.avoid &^= m
   2508 }
   2509 
   2510 // copy copies a desired state from another desiredState x.
   2511 func (d *desiredState) copy(x *desiredState) {
   2512 	d.entries = append(d.entries[:0], x.entries...)
   2513 	d.avoid = x.avoid
   2514 }
   2515 
   2516 // remove removes the desired registers for vid and returns them.
   2517 func (d *desiredState) remove(vid ID) [4]register {
   2518 	for i := range d.entries {
   2519 		if d.entries[i].ID == vid {
   2520 			regs := d.entries[i].regs
   2521 			d.entries[i] = d.entries[len(d.entries)-1]
   2522 			d.entries = d.entries[:len(d.entries)-1]
   2523 			return regs
   2524 		}
   2525 	}
   2526 	return [4]register{noRegister, noRegister, noRegister, noRegister}
   2527 }
   2528 
   2529 // merge merges another desired state x into d.
   2530 func (d *desiredState) merge(x *desiredState) {
   2531 	d.avoid |= x.avoid
   2532 	// There should only be a few desired registers, so
   2533 	// linear insert is ok.
   2534 	for _, e := range x.entries {
   2535 		d.addList(e.ID, e.regs)
   2536 	}
   2537 }
   2538 
   2539 func min32(x, y int32) int32 {
   2540 	if x < y {
   2541 		return x
   2542 	}
   2543 	return y
   2544 }
   2545 func max32(x, y int32) int32 {
   2546 	if x > y {
   2547 		return x
   2548 	}
   2549 	return y
   2550 }
   2551