Home | History | Annotate | Download | only in ssa
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package ssa
      6 
      7 import "fmt"
      8 
      9 // an edgeMemCtr records a backedge, together with the memory and
     10 // counter phi functions at the target of the backedge that must
     11 // be updated when a rescheduling check replaces the backedge.
     12 type edgeMemCtr struct {
     13 	e Edge
     14 	m *Value // phi for memory at dest of e
     15 	c *Value // phi for counter at dest of e
     16 }
     17 
     18 // a rewriteTarget is a a value-argindex pair indicating
     19 // where a rewrite is applied.  Note that this is for values,
     20 // not for block controls, because block controls are not targets
     21 // for the rewrites performed in inserting rescheduling checks.
     22 type rewriteTarget struct {
     23 	v *Value
     24 	i int
     25 }
     26 
     27 type rewrite struct {
     28 	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
     29 	rewrites      []rewriteTarget // all the targets for this rewrite.
     30 }
     31 
     32 func (r *rewrite) String() string {
     33 	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
     34 	for _, rw := range r.rewrites {
     35 		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
     36 	}
     37 	s += "\n"
     38 	return s
     39 }
     40 
     41 const initialRescheduleCounterValue = 1021 // Largest 10-bit prime. 97 nSec loop bodies will check every 100 uSec.
     42 
     43 // insertLoopReschedChecks inserts rescheduling checks on loop backedges.
     44 func insertLoopReschedChecks(f *Func) {
     45 	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
     46 
     47 	// Loop reschedule checks decrement a per-function counter
     48 	// shared by all loops, and when the counter becomes non-positive
     49 	// a call is made to a rescheduling check in the runtime.
     50 	//
     51 	// Steps:
     52 	// 1. locate backedges.
     53 	// 2. Record memory definitions at block end so that
     54 	//    the SSA graph for mem can be prperly modified.
     55 	// 3. Define a counter and record its future uses (at backedges)
     56 	//    (Same process as 2, applied to a single definition of the counter.
     57 	//     difference for mem is that there are zero-to-many existing mem
     58 	//     definitions, versus exactly one for the new counter.)
     59 	// 4. Ensure that phi functions that will-be-needed for mem and counter
     60 	//    are present in the graph, initially with trivial inputs.
     61 	// 5. Record all to-be-modified uses of mem and counter;
     62 	//    apply modifications (split into two steps to simplify and
     63 	//    avoided nagging order-dependences).
     64 	// 6. Rewrite backedges to include counter check, reschedule check,
     65 	//    and modify destination phi function appropriately with new
     66 	//    definitions for mem and counter.
     67 
     68 	if f.NoSplit { // nosplit functions don't reschedule.
     69 		return
     70 	}
     71 
     72 	backedges := backedges(f)
     73 	if len(backedges) == 0 { // no backedges means no rescheduling checks.
     74 		return
     75 	}
     76 
     77 	lastMems := findLastMems(f)
     78 
     79 	idom := f.Idom()
     80 	sdom := f.sdom()
     81 
     82 	if f.pass.debug > 2 {
     83 		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
     84 	}
     85 
     86 	tofixBackedges := []edgeMemCtr{}
     87 
     88 	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
     89 		tofixBackedges = append(tofixBackedges, edgeMemCtr{e, nil, nil})
     90 	}
     91 
     92 	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
     93 	if lastMems[f.Entry.ID] == nil {
     94 		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Line, OpInitMem, TypeMem)
     95 	}
     96 
     97 	memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
     98 
     99 	// Propagate last mem definitions forward through successor blocks.
    100 	po := f.postorder()
    101 	for i := len(po) - 1; i >= 0; i-- {
    102 		b := po[i]
    103 		mem := lastMems[b.ID]
    104 		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
    105 			// loop because there might be backedges that haven't been visited yet.
    106 			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
    107 		}
    108 		memDefsAtBlockEnds[b.ID] = mem
    109 	}
    110 
    111 	// Set up counter.  There are no phis etc pre-existing for it.
    112 	counter0 := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), initialRescheduleCounterValue)
    113 	ctrDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, def visible at its end, if that def will be used.
    114 
    115 	// There's a minor difference between memDefsAtBlockEnds and ctrDefsAtBlockEnds;
    116 	// because the counter only matter for loops and code that reaches them, it is nil for blocks where the ctr is no
    117 	// longer live.  This will avoid creation of dead phi functions.  This optimization is ignored for the mem variable
    118 	// because it is harder and also less likely to be helpful, though dead code elimination ought to clean this out anyhow.
    119 
    120 	for _, emc := range tofixBackedges {
    121 		e := emc.e
    122 		// set initial uses of counter zero (note available-at-bottom and use are the same thing initially.)
    123 		// each back-edge will be rewritten to include a reschedule check, and that will use the counter.
    124 		src := e.b.Preds[e.i].b
    125 		ctrDefsAtBlockEnds[src.ID] = counter0
    126 	}
    127 
    128 	// Push uses towards root
    129 	for _, b := range f.postorder() {
    130 		bd := ctrDefsAtBlockEnds[b.ID]
    131 		if bd == nil {
    132 			continue
    133 		}
    134 		for _, e := range b.Preds {
    135 			p := e.b
    136 			if ctrDefsAtBlockEnds[p.ID] == nil {
    137 				ctrDefsAtBlockEnds[p.ID] = bd
    138 			}
    139 		}
    140 	}
    141 
    142 	// Maps from block to newly-inserted phi function in block.
    143 	newmemphis := make(map[*Block]rewrite)
    144 	newctrphis := make(map[*Block]rewrite)
    145 
    146 	// Insert phi functions as necessary for future changes to flow graph.
    147 	for i, emc := range tofixBackedges {
    148 		e := emc.e
    149 		h := e.b
    150 
    151 		// find the phi function for the memory input at "h", if there is one.
    152 		var headerMemPhi *Value // look for header mem phi
    153 
    154 		for _, v := range h.Values {
    155 			if v.Op == OpPhi && v.Type.IsMemory() {
    156 				headerMemPhi = v
    157 			}
    158 		}
    159 
    160 		if headerMemPhi == nil {
    161 			// if the header is nil, make a trivial phi from the dominator
    162 			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
    163 			headerMemPhi = newPhiFor(h, mem0)
    164 			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
    165 			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis)
    166 
    167 		}
    168 		tofixBackedges[i].m = headerMemPhi
    169 
    170 		var headerCtrPhi *Value
    171 		rw, ok := newctrphis[h]
    172 		if !ok {
    173 			headerCtrPhi = newPhiFor(h, counter0)
    174 			newctrphis[h] = rewrite{before: counter0, after: headerCtrPhi}
    175 			addDFphis(counter0, h, h, f, ctrDefsAtBlockEnds, newctrphis)
    176 		} else {
    177 			headerCtrPhi = rw.after
    178 		}
    179 		tofixBackedges[i].c = headerCtrPhi
    180 	}
    181 
    182 	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis)
    183 	rewriteNewPhis(f.Entry, f.Entry, f, ctrDefsAtBlockEnds, newctrphis)
    184 
    185 	if f.pass.debug > 0 {
    186 		for b, r := range newmemphis {
    187 			fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
    188 		}
    189 
    190 		for b, r := range newctrphis {
    191 			fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
    192 		}
    193 	}
    194 
    195 	// Apply collected rewrites.
    196 	for _, r := range newmemphis {
    197 		for _, rw := range r.rewrites {
    198 			rw.v.SetArg(rw.i, r.after)
    199 		}
    200 	}
    201 
    202 	for _, r := range newctrphis {
    203 		for _, rw := range r.rewrites {
    204 			rw.v.SetArg(rw.i, r.after)
    205 		}
    206 	}
    207 
    208 	zero := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 0)
    209 	one := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 1)
    210 
    211 	// Rewrite backedges to include reschedule checks.
    212 	for _, emc := range tofixBackedges {
    213 		e := emc.e
    214 		headerMemPhi := emc.m
    215 		headerCtrPhi := emc.c
    216 		h := e.b
    217 		i := e.i
    218 		p := h.Preds[i]
    219 		bb := p.b
    220 		mem0 := headerMemPhi.Args[i]
    221 		ctr0 := headerCtrPhi.Args[i]
    222 		// bb e->p h,
    223 		// Because we're going to insert a rare-call, make sure the
    224 		// looping edge still looks likely.
    225 		likely := BranchLikely
    226 		if p.i != 0 {
    227 			likely = BranchUnlikely
    228 		}
    229 		bb.Likely = likely
    230 
    231 		// rewrite edge to include reschedule check
    232 		// existing edges:
    233 		//
    234 		// bb.Succs[p.i] == Edge{h, i}
    235 		// h.Preds[i] == p == Edge{bb,p.i}
    236 		//
    237 		// new block(s):
    238 		// test:
    239 		//    ctr1 := ctr0 - 1
    240 		//    if ctr1 <= 0 { goto sched }
    241 		//    goto join
    242 		// sched:
    243 		//    mem1 := call resched (mem0)
    244 		//    goto join
    245 		// join:
    246 		//    ctr2 := phi(ctr1, counter0) // counter0 is the constant
    247 		//    mem2 := phi(mem0, mem1)
    248 		//    goto h
    249 		//
    250 		// and correct arg i of headerMemPhi and headerCtrPhi
    251 		//
    252 		// EXCEPT: block containing only phi functions is bad
    253 		// for the register allocator.  Therefore, there is no
    254 		// join, and instead branches targeting join instead target
    255 		// the header, and the other phi functions within header are
    256 		// adjusted for the additional input.
    257 
    258 		test := f.NewBlock(BlockIf)
    259 		sched := f.NewBlock(BlockPlain)
    260 
    261 		test.Line = bb.Line
    262 		sched.Line = bb.Line
    263 
    264 		//    ctr1 := ctr0 - 1
    265 		//    if ctr1 <= 0 { goto sched }
    266 		//    goto header
    267 		ctr1 := test.NewValue2(bb.Line, OpSub32, f.Config.fe.TypeInt32(), ctr0, one)
    268 		cmp := test.NewValue2(bb.Line, OpLeq32, f.Config.fe.TypeBool(), ctr1, zero)
    269 		test.SetControl(cmp)
    270 		test.AddEdgeTo(sched) // if true
    271 		// if false -- rewrite edge to header.
    272 		// do NOT remove+add, because that will perturb all the other phi functions
    273 		// as well as messing up other edges to the header.
    274 		test.Succs = append(test.Succs, Edge{h, i})
    275 		h.Preds[i] = Edge{test, 1}
    276 		headerMemPhi.SetArg(i, mem0)
    277 		headerCtrPhi.SetArg(i, ctr1)
    278 
    279 		test.Likely = BranchUnlikely
    280 
    281 		// sched:
    282 		//    mem1 := call resched (mem0)
    283 		//    goto header
    284 		resched := f.Config.fe.Syslook("goschedguarded")
    285 		mem1 := sched.NewValue1A(bb.Line, OpStaticCall, TypeMem, resched, mem0)
    286 		sched.AddEdgeTo(h)
    287 		headerMemPhi.AddArg(mem1)
    288 		headerCtrPhi.AddArg(counter0)
    289 
    290 		bb.Succs[p.i] = Edge{test, 0}
    291 		test.Preds = append(test.Preds, Edge{bb, p.i})
    292 
    293 		// Must correct all the other phi functions in the header for new incoming edge.
    294 		// Except for mem and counter phis, it will be the same value seen on the original
    295 		// backedge at index i.
    296 		for _, v := range h.Values {
    297 			if v.Op == OpPhi && v != headerMemPhi && v != headerCtrPhi {
    298 				v.AddArg(v.Args[i])
    299 			}
    300 		}
    301 	}
    302 
    303 	f.invalidateCFG()
    304 
    305 	if f.pass.debug > 2 {
    306 		sdom = newSparseTree(f, f.Idom())
    307 		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
    308 	}
    309 
    310 	return
    311 }
    312 
    313 // newPhiFor inserts a new Phi function into b,
    314 // with all inputs set to v.
    315 func newPhiFor(b *Block, v *Value) *Value {
    316 	phiV := b.NewValue0(b.Line, OpPhi, v.Type)
    317 
    318 	for range b.Preds {
    319 		phiV.AddArg(v)
    320 	}
    321 	return phiV
    322 }
    323 
    324 // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
    325 // in block h will replace a previous definition.  Block b is the block currently being processed;
    326 // if b has its own phi definition then it takes the place of h.
    327 // defsForUses provides information about other definitions of the variable that are present
    328 // (and if nil, indicates that the variable is no longer live)
    329 func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite) {
    330 	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
    331 	if _, ok := newphis[b]; ok {
    332 		h = b
    333 	}
    334 	change := newphis[h]
    335 	x := change.before
    336 	y := change.after
    337 
    338 	// Apply rewrites to this block
    339 	if x != nil { // don't waste time on the common case of no definition.
    340 		p := &change.rewrites
    341 		for _, v := range b.Values {
    342 			if v == y { // don't rewrite self -- phi inputs are handled below.
    343 				continue
    344 			}
    345 			for i, w := range v.Args {
    346 				if w != x {
    347 					continue
    348 				}
    349 				*p = append(*p, rewriteTarget{v, i})
    350 			}
    351 		}
    352 
    353 		// Rewrite appropriate inputs of phis reached in successors
    354 		// in dominance frontier, self, and dominated.
    355 		// If the variable def reaching uses in b is itself defined in b, then the new phi function
    356 		// does not reach the successors of b.  (This assumes a bit about the structure of the
    357 		// phi use-def graph, but it's true for memory and the inserted counter.)
    358 		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
    359 			for _, e := range b.Succs {
    360 				s := e.b
    361 				if sphi, ok := newphis[s]; ok { // saves time to find the phi this way.
    362 					*p = append(*p, rewriteTarget{sphi.after, e.i})
    363 					continue
    364 				}
    365 				for _, v := range s.Values {
    366 					if v.Op == OpPhi && v.Args[e.i] == x {
    367 						*p = append(*p, rewriteTarget{v, e.i})
    368 						break
    369 					}
    370 				}
    371 			}
    372 		}
    373 		newphis[h] = change
    374 	}
    375 
    376 	sdom := f.sdom()
    377 
    378 	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
    379 		rewriteNewPhis(h, c, f, defsForUses, newphis) // TODO: convert to explicit stack from recursion.
    380 	}
    381 }
    382 
    383 // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
    384 // a new definition for variable "x" inserted at h (usually but not necessarily a phi).
    385 // These new phis can only occur at the dominance frontier of h; block s is in the dominance
    386 // frontier of h if h does not strictly dominate s and if s is a successor of a block b where
    387 // either b = h or h strictly dominates b.
    388 // These newly created phis are themselves new definitions that may require addition of their
    389 // own trivial phi functions in their own dominance frontier, and this is handled recursively.
    390 func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite) {
    391 	oldv := defForUses[b.ID]
    392 	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
    393 		return
    394 	}
    395 	sdom := f.sdom()
    396 	idom := f.Idom()
    397 outer:
    398 	for _, e := range b.Succs {
    399 		s := e.b
    400 		// check phi functions in the dominance frontier
    401 		if sdom.isAncestor(h, s) {
    402 			continue // h dominates s, successor of b, therefore s is not in the frontier.
    403 		}
    404 		if _, ok := newphis[s]; ok {
    405 			continue // successor s of b already has a new phi function, so there is no need to add another.
    406 		}
    407 		if x != nil {
    408 			for _, v := range s.Values {
    409 				if v.Op == OpPhi && v.Args[e.i] == x {
    410 					continue outer // successor s of b has an old phi function, so there is no need to add another.
    411 				}
    412 			}
    413 		}
    414 
    415 		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
    416 		headerPhi := newPhiFor(s, old)
    417 		// the new phi will replace "old" in block s and all blocks dominated by s.
    418 		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
    419 		addDFphis(old, s, s, f, defForUses, newphis)        // the new definition may also create new phi functions.
    420 	}
    421 	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
    422 		addDFphis(x, h, c, f, defForUses, newphis) // TODO: convert to explicit stack from recursion.
    423 	}
    424 }
    425 
    426 // findLastMems maps block ids to last memory-output op in a block, if any
    427 func findLastMems(f *Func) []*Value {
    428 
    429 	var stores []*Value
    430 	lastMems := make([]*Value, f.NumBlocks())
    431 	storeUse := f.newSparseSet(f.NumValues())
    432 	defer f.retSparseSet(storeUse)
    433 	for _, b := range f.Blocks {
    434 		// Find all the stores in this block. Categorize their uses:
    435 		//  storeUse contains stores which are used by a subsequent store.
    436 		storeUse.clear()
    437 		stores = stores[:0]
    438 		var memPhi *Value
    439 		for _, v := range b.Values {
    440 			if v.Op == OpPhi {
    441 				if v.Type.IsMemory() {
    442 					memPhi = v
    443 				}
    444 				continue
    445 			}
    446 			if v.Type.IsMemory() {
    447 				stores = append(stores, v)
    448 				if v.Op == OpSelect1 {
    449 					// Use the arg of the tuple-generating op.
    450 					v = v.Args[0]
    451 				}
    452 				for _, a := range v.Args {
    453 					if a.Block == b && a.Type.IsMemory() {
    454 						storeUse.add(a.ID)
    455 					}
    456 				}
    457 			}
    458 		}
    459 		if len(stores) == 0 {
    460 			lastMems[b.ID] = memPhi
    461 			continue
    462 		}
    463 
    464 		// find last store in the block
    465 		var last *Value
    466 		for _, v := range stores {
    467 			if storeUse.contains(v.ID) {
    468 				continue
    469 			}
    470 			if last != nil {
    471 				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
    472 			}
    473 			last = v
    474 		}
    475 		if last == nil {
    476 			b.Fatalf("no last store found - cycle?")
    477 		}
    478 		lastMems[b.ID] = last
    479 	}
    480 	return lastMems
    481 }
    482 
    483 type backedgesState struct {
    484 	b *Block
    485 	i int
    486 }
    487 
    488 // backedges returns a slice of successor edges that are back
    489 // edges.  For reducible loops, edge.b is the header.
    490 func backedges(f *Func) []Edge {
    491 	edges := []Edge{}
    492 	mark := make([]markKind, f.NumBlocks())
    493 	stack := []backedgesState{}
    494 
    495 	mark[f.Entry.ID] = notExplored
    496 	stack = append(stack, backedgesState{f.Entry, 0})
    497 
    498 	for len(stack) > 0 {
    499 		l := len(stack)
    500 		x := stack[l-1]
    501 		if x.i < len(x.b.Succs) {
    502 			e := x.b.Succs[x.i]
    503 			stack[l-1].i++
    504 			s := e.b
    505 			if mark[s.ID] == notFound {
    506 				mark[s.ID] = notExplored
    507 				stack = append(stack, backedgesState{s, 0})
    508 			} else if mark[s.ID] == notExplored {
    509 				edges = append(edges, e)
    510 			}
    511 		} else {
    512 			mark[x.b.ID] = done
    513 			stack = stack[0 : l-1]
    514 		}
    515 	}
    516 	return edges
    517 }
    518