Home | History | Annotate | Download | only in runtime
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Garbage collector: marking and scanning
      6 
      7 package runtime
      8 
      9 import "unsafe"
     10 
     11 // Scan all of the stacks, greying (or graying if in America) the referents
     12 // but not blackening them since the mark write barrier isn't installed.
     13 //go:nowritebarrier
     14 func gcscan_m() {
     15 	_g_ := getg()
     16 
     17 	// Grab the g that called us and potentially allow rescheduling.
     18 	// This allows it to be scanned like other goroutines.
     19 	mastergp := _g_.m.curg
     20 	casgstatus(mastergp, _Grunning, _Gwaiting)
     21 	mastergp.waitreason = "garbage collection scan"
     22 
     23 	// Span sweeping has been done by finishsweep_m.
     24 	// Long term we will want to make this goroutine runnable
     25 	// by placing it onto a scanenqueue state and then calling
     26 	// runtimerestartg(mastergp) to make it Grunnable.
     27 	// At the bottom we will want to return this p back to the scheduler.
     28 
     29 	// Prepare flag indicating that the scan has not been completed.
     30 	local_allglen := gcResetGState()
     31 
     32 	work.ndone = 0
     33 	useOneP := uint32(1) // For now do not do this in parallel.
     34 	//	ackgcphase is not needed since we are not scanning running goroutines.
     35 	parforsetup(work.markfor, useOneP, uint32(_RootCount+local_allglen), false, markroot)
     36 	parfordo(work.markfor)
     37 
     38 	lock(&allglock)
     39 	// Check that gc work is done.
     40 	for i := 0; i < local_allglen; i++ {
     41 		gp := allgs[i]
     42 		if !gp.gcscandone {
     43 			throw("scan missed a g")
     44 		}
     45 	}
     46 	unlock(&allglock)
     47 
     48 	casgstatus(mastergp, _Gwaiting, _Grunning)
     49 	// Let the g that called us continue to run.
     50 }
     51 
     52 // ptrmask for an allocation containing a single pointer.
     53 var oneptrmask = [...]uint8{1}
     54 
     55 //go:nowritebarrier
     56 func markroot(desc *parfor, i uint32) {
     57 	// TODO: Consider using getg().m.p.ptr().gcw.
     58 	var gcw gcWork
     59 
     60 	// Note: if you add a case here, please also update heapdump.go:dumproots.
     61 	switch i {
     62 	case _RootData:
     63 		for datap := &firstmoduledata; datap != nil; datap = datap.next {
     64 			scanblock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, &gcw)
     65 		}
     66 
     67 	case _RootBss:
     68 		for datap := &firstmoduledata; datap != nil; datap = datap.next {
     69 			scanblock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, &gcw)
     70 		}
     71 
     72 	case _RootFinalizers:
     73 		for fb := allfin; fb != nil; fb = fb.alllink {
     74 			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
     75 		}
     76 
     77 	case _RootSpans:
     78 		// mark MSpan.specials
     79 		sg := mheap_.sweepgen
     80 		for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
     81 			s := work.spans[spanidx]
     82 			if s.state != mSpanInUse {
     83 				continue
     84 			}
     85 			if !useCheckmark && s.sweepgen != sg {
     86 				// sweepgen was updated (+2) during non-checkmark GC pass
     87 				print("sweep ", s.sweepgen, " ", sg, "\n")
     88 				throw("gc: unswept span")
     89 			}
     90 			for sp := s.specials; sp != nil; sp = sp.next {
     91 				if sp.kind != _KindSpecialFinalizer {
     92 					continue
     93 				}
     94 				// don't mark finalized object, but scan it so we
     95 				// retain everything it points to.
     96 				spf := (*specialfinalizer)(unsafe.Pointer(sp))
     97 				// A finalizer can be set for an inner byte of an object, find object beginning.
     98 				p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
     99 				if gcphase != _GCscan {
    100 					scanobject(p, &gcw) // scanned during mark termination
    101 				}
    102 				scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptrmask[0], &gcw)
    103 			}
    104 		}
    105 
    106 	case _RootFlushCaches:
    107 		if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
    108 			flushallmcaches()
    109 		}
    110 
    111 	default:
    112 		// the rest is scanning goroutine stacks
    113 		if uintptr(i-_RootCount) >= allglen {
    114 			throw("markroot: bad index")
    115 		}
    116 		gp := allgs[i-_RootCount]
    117 
    118 		// remember when we've first observed the G blocked
    119 		// needed only to output in traceback
    120 		status := readgstatus(gp) // We are not in a scan state
    121 		if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
    122 			gp.waitsince = work.tstart
    123 		}
    124 
    125 		// Shrink a stack if not much of it is being used but not in the scan phase.
    126 		if gcphase == _GCmarktermination {
    127 			// Shrink during STW GCmarktermination phase thus avoiding
    128 			// complications introduced by shrinking during
    129 			// non-STW phases.
    130 			shrinkstack(gp)
    131 		}
    132 
    133 		scang(gp)
    134 	}
    135 
    136 	gcw.dispose()
    137 }
    138 
    139 // gcAssistAlloc records and allocation of size bytes and, if
    140 // allowAssist is true, may assist GC scanning in proportion to the
    141 // allocations performed by this mutator since the last assist.
    142 //
    143 // It should only be called if gcAssistAlloc != 0.
    144 //
    145 // This must be called with preemption disabled.
    146 //go:nowritebarrier
    147 func gcAssistAlloc(size uintptr, allowAssist bool) {
    148 	// Find the G responsible for this assist.
    149 	gp := getg()
    150 	if gp.m.curg != nil {
    151 		gp = gp.m.curg
    152 	}
    153 
    154 	// Record allocation.
    155 	gp.gcalloc += size
    156 
    157 	if !allowAssist {
    158 		return
    159 	}
    160 
    161 	// Don't assist in non-preemptible contexts. These are
    162 	// generally fragile and won't allow the assist to block.
    163 	if getg() == gp.m.g0 {
    164 		return
    165 	}
    166 	if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" {
    167 		return
    168 	}
    169 
    170 	// Compute the amount of assist scan work we need to do.
    171 	scanWork := int64(gcController.assistRatio*float64(gp.gcalloc)) - gp.gcscanwork
    172 	// scanWork can be negative if the last assist scanned a large
    173 	// object and we're still ahead of our assist goal.
    174 	if scanWork <= 0 {
    175 		return
    176 	}
    177 
    178 retry:
    179 	// Steal as much credit as we can from the background GC's
    180 	// scan credit. This is racy and may drop the background
    181 	// credit below 0 if two mutators steal at the same time. This
    182 	// will just cause steals to fail until credit is accumulated
    183 	// again, so in the long run it doesn't really matter, but we
    184 	// do have to handle the negative credit case.
    185 	bgScanCredit := atomicloadint64(&gcController.bgScanCredit)
    186 	stolen := int64(0)
    187 	if bgScanCredit > 0 {
    188 		if bgScanCredit < scanWork {
    189 			stolen = bgScanCredit
    190 		} else {
    191 			stolen = scanWork
    192 		}
    193 		xaddint64(&gcController.bgScanCredit, -stolen)
    194 
    195 		scanWork -= stolen
    196 		gp.gcscanwork += stolen
    197 
    198 		if scanWork == 0 {
    199 			return
    200 		}
    201 	}
    202 
    203 	// Perform assist work
    204 	completed := false
    205 	systemstack(func() {
    206 		if atomicload(&gcBlackenEnabled) == 0 {
    207 			// The gcBlackenEnabled check in malloc races with the
    208 			// store that clears it but an atomic check in every malloc
    209 			// would be a performance hit.
    210 			// Instead we recheck it here on the non-preemptable system
    211 			// stack to determine if we should preform an assist.
    212 
    213 			// GC is done, so ignore any remaining debt.
    214 			scanWork = 0
    215 			return
    216 		}
    217 		// Track time spent in this assist. Since we're on the
    218 		// system stack, this is non-preemptible, so we can
    219 		// just measure start and end time.
    220 		startTime := nanotime()
    221 
    222 		decnwait := xadd(&work.nwait, -1)
    223 		if decnwait == work.nproc {
    224 			println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
    225 			throw("nwait > work.nprocs")
    226 		}
    227 
    228 		// drain own cached work first in the hopes that it
    229 		// will be more cache friendly.
    230 		gcw := &getg().m.p.ptr().gcw
    231 		startScanWork := gcw.scanWork
    232 		gcDrainN(gcw, scanWork)
    233 		// Record that we did this much scan work.
    234 		workDone := gcw.scanWork - startScanWork
    235 		gp.gcscanwork += workDone
    236 		scanWork -= workDone
    237 		// If we are near the end of the mark phase
    238 		// dispose of the gcw.
    239 		if gcBlackenPromptly {
    240 			gcw.dispose()
    241 		}
    242 		// If this is the last worker and we ran out of work,
    243 		// signal a completion point.
    244 		incnwait := xadd(&work.nwait, +1)
    245 		if incnwait > work.nproc {
    246 			println("runtime: work.nwait=", incnwait,
    247 				"work.nproc=", work.nproc,
    248 				"gcBlackenPromptly=", gcBlackenPromptly)
    249 			throw("work.nwait > work.nproc")
    250 		}
    251 
    252 		if incnwait == work.nproc && work.full == 0 && work.partial == 0 {
    253 			// This has reached a background completion
    254 			// point.
    255 			if gcBlackenPromptly {
    256 				if work.bgMark1.done == 0 {
    257 					throw("completing mark 2, but bgMark1.done == 0")
    258 				}
    259 				work.bgMark2.complete()
    260 			} else {
    261 				work.bgMark1.complete()
    262 			}
    263 			completed = true
    264 		}
    265 		duration := nanotime() - startTime
    266 		_p_ := gp.m.p.ptr()
    267 		_p_.gcAssistTime += duration
    268 		if _p_.gcAssistTime > gcAssistTimeSlack {
    269 			xaddint64(&gcController.assistTime, _p_.gcAssistTime)
    270 			_p_.gcAssistTime = 0
    271 		}
    272 	})
    273 
    274 	if completed {
    275 		// We called complete() above, so we should yield to
    276 		// the now-runnable GC coordinator.
    277 		Gosched()
    278 
    279 		// It's likely that this assist wasn't able to pay off
    280 		// its debt, but it's also likely that the Gosched let
    281 		// the GC finish this cycle and there's no point in
    282 		// waiting. If the GC finished, skip the delay below.
    283 		if atomicload(&gcBlackenEnabled) == 0 {
    284 			scanWork = 0
    285 		}
    286 	}
    287 
    288 	if scanWork > 0 {
    289 		// We were unable steal enough credit or perform
    290 		// enough work to pay off the assist debt. We need to
    291 		// do one of these before letting the mutator allocate
    292 		// more, so go around again after performing an
    293 		// interruptible sleep for 100 us (the same as the
    294 		// getfull barrier) to let other mutators run.
    295 		timeSleep(100 * 1000)
    296 		goto retry
    297 	}
    298 }
    299 
    300 //go:nowritebarrier
    301 func scanstack(gp *g) {
    302 	if gp.gcscanvalid {
    303 		if gcphase == _GCmarktermination {
    304 			gcRemoveStackBarriers(gp)
    305 		}
    306 		return
    307 	}
    308 
    309 	if readgstatus(gp)&_Gscan == 0 {
    310 		print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
    311 		throw("scanstack - bad status")
    312 	}
    313 
    314 	switch readgstatus(gp) &^ _Gscan {
    315 	default:
    316 		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
    317 		throw("mark - bad status")
    318 	case _Gdead:
    319 		return
    320 	case _Grunning:
    321 		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
    322 		throw("scanstack: goroutine not stopped")
    323 	case _Grunnable, _Gsyscall, _Gwaiting:
    324 		// ok
    325 	}
    326 
    327 	if gp == getg() {
    328 		throw("can't scan our own stack")
    329 	}
    330 	mp := gp.m
    331 	if mp != nil && mp.helpgc != 0 {
    332 		throw("can't scan gchelper stack")
    333 	}
    334 
    335 	var sp, barrierOffset, nextBarrier uintptr
    336 	if gp.syscallsp != 0 {
    337 		sp = gp.syscallsp
    338 	} else {
    339 		sp = gp.sched.sp
    340 	}
    341 	switch gcphase {
    342 	case _GCscan:
    343 		// Install stack barriers during stack scan.
    344 		barrierOffset = uintptr(firstStackBarrierOffset)
    345 		nextBarrier = sp + barrierOffset
    346 
    347 		if debug.gcstackbarrieroff > 0 {
    348 			nextBarrier = ^uintptr(0)
    349 		}
    350 
    351 		if gp.stkbarPos != 0 || len(gp.stkbar) != 0 {
    352 			// If this happens, it's probably because we
    353 			// scanned a stack twice in the same phase.
    354 			print("stkbarPos=", gp.stkbarPos, " len(stkbar)=", len(gp.stkbar), " goid=", gp.goid, " gcphase=", gcphase, "\n")
    355 			throw("g already has stack barriers")
    356 		}
    357 
    358 	case _GCmarktermination:
    359 		if int(gp.stkbarPos) == len(gp.stkbar) {
    360 			// gp hit all of the stack barriers (or there
    361 			// were none). Re-scan the whole stack.
    362 			nextBarrier = ^uintptr(0)
    363 		} else {
    364 			// Only re-scan up to the lowest un-hit
    365 			// barrier. Any frames above this have not
    366 			// executed since the _GCscan scan of gp and
    367 			// any writes through up-pointers to above
    368 			// this barrier had write barriers.
    369 			nextBarrier = gp.stkbar[gp.stkbarPos].savedLRPtr
    370 			if debugStackBarrier {
    371 				print("rescan below ", hex(nextBarrier), " in [", hex(sp), ",", hex(gp.stack.hi), ") goid=", gp.goid, "\n")
    372 			}
    373 		}
    374 
    375 		gcRemoveStackBarriers(gp)
    376 
    377 	default:
    378 		throw("scanstack in wrong phase")
    379 	}
    380 
    381 	gcw := &getg().m.p.ptr().gcw
    382 	n := 0
    383 	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
    384 		scanframeworker(frame, unused, gcw)
    385 
    386 		if frame.fp > nextBarrier {
    387 			// We skip installing a barrier on bottom-most
    388 			// frame because on LR machines this LR is not
    389 			// on the stack.
    390 			if gcphase == _GCscan && n != 0 {
    391 				if gcInstallStackBarrier(gp, frame) {
    392 					barrierOffset *= 2
    393 					nextBarrier = sp + barrierOffset
    394 				}
    395 			} else if gcphase == _GCmarktermination {
    396 				// We just scanned a frame containing
    397 				// a return to a stack barrier. Since
    398 				// this frame never returned, we can
    399 				// stop scanning.
    400 				return false
    401 			}
    402 		}
    403 		n++
    404 
    405 		return true
    406 	}
    407 	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
    408 	tracebackdefers(gp, scanframe, nil)
    409 	if gcphase == _GCmarktermination {
    410 		gcw.dispose()
    411 	}
    412 	gp.gcscanvalid = true
    413 }
    414 
    415 // Scan a stack frame: local variables and function arguments/results.
    416 //go:nowritebarrier
    417 func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWork) {
    418 
    419 	f := frame.fn
    420 	targetpc := frame.continpc
    421 	if targetpc == 0 {
    422 		// Frame is dead.
    423 		return
    424 	}
    425 	if _DebugGC > 1 {
    426 		print("scanframe ", funcname(f), "\n")
    427 	}
    428 	if targetpc != f.entry {
    429 		targetpc--
    430 	}
    431 	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
    432 	if pcdata == -1 {
    433 		// We do not have a valid pcdata value but there might be a
    434 		// stackmap for this function.  It is likely that we are looking
    435 		// at the function prologue, assume so and hope for the best.
    436 		pcdata = 0
    437 	}
    438 
    439 	// Scan local variables if stack frame has been allocated.
    440 	size := frame.varp - frame.sp
    441 	var minsize uintptr
    442 	switch thechar {
    443 	case '6', '8':
    444 		minsize = 0
    445 	case '7':
    446 		minsize = spAlign
    447 	default:
    448 		minsize = ptrSize
    449 	}
    450 	if size > minsize {
    451 		stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
    452 		if stkmap == nil || stkmap.n <= 0 {
    453 			print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
    454 			throw("missing stackmap")
    455 		}
    456 
    457 		// Locals bitmap information, scan just the pointers in locals.
    458 		if pcdata < 0 || pcdata >= stkmap.n {
    459 			// don't know where we are
    460 			print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
    461 			throw("scanframe: bad symbol table")
    462 		}
    463 		bv := stackmapdata(stkmap, pcdata)
    464 		size = uintptr(bv.n) * ptrSize
    465 		scanblock(frame.varp-size, size, bv.bytedata, gcw)
    466 	}
    467 
    468 	// Scan arguments.
    469 	if frame.arglen > 0 {
    470 		var bv bitvector
    471 		if frame.argmap != nil {
    472 			bv = *frame.argmap
    473 		} else {
    474 			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
    475 			if stkmap == nil || stkmap.n <= 0 {
    476 				print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
    477 				throw("missing stackmap")
    478 			}
    479 			if pcdata < 0 || pcdata >= stkmap.n {
    480 				// don't know where we are
    481 				print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
    482 				throw("scanframe: bad symbol table")
    483 			}
    484 			bv = stackmapdata(stkmap, pcdata)
    485 		}
    486 		scanblock(frame.argp, uintptr(bv.n)*ptrSize, bv.bytedata, gcw)
    487 	}
    488 }
    489 
    490 // gcMaxStackBarriers returns the maximum number of stack barriers
    491 // that can be installed in a stack of stackSize bytes.
    492 func gcMaxStackBarriers(stackSize int) (n int) {
    493 	if firstStackBarrierOffset == 0 {
    494 		// Special debugging case for inserting stack barriers
    495 		// at every frame. Steal half of the stack for the
    496 		// []stkbar. Technically, if the stack were to consist
    497 		// solely of return PCs we would need two thirds of
    498 		// the stack, but stealing that much breaks things and
    499 		// this doesn't happen in practice.
    500 		return stackSize / 2 / int(unsafe.Sizeof(stkbar{}))
    501 	}
    502 
    503 	offset := firstStackBarrierOffset
    504 	for offset < stackSize {
    505 		n++
    506 		offset *= 2
    507 	}
    508 	return n + 1
    509 }
    510 
    511 // gcInstallStackBarrier installs a stack barrier over the return PC of frame.
    512 //go:nowritebarrier
    513 func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
    514 	if frame.lr == 0 {
    515 		if debugStackBarrier {
    516 			print("not installing stack barrier with no LR, goid=", gp.goid, "\n")
    517 		}
    518 		return false
    519 	}
    520 
    521 	if frame.fn.entry == cgocallback_gofuncPC {
    522 		// cgocallback_gofunc doesn't return to its LR;
    523 		// instead, its return path puts LR in g.sched.pc and
    524 		// switches back to the system stack on which
    525 		// cgocallback_gofunc was originally called. We can't
    526 		// have a stack barrier in g.sched.pc, so don't
    527 		// install one in this frame.
    528 		if debugStackBarrier {
    529 			print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n")
    530 		}
    531 		return false
    532 	}
    533 
    534 	// Save the return PC and overwrite it with stackBarrier.
    535 	var lrUintptr uintptr
    536 	if usesLR {
    537 		lrUintptr = frame.sp
    538 	} else {
    539 		lrUintptr = frame.fp - regSize
    540 	}
    541 	lrPtr := (*uintreg)(unsafe.Pointer(lrUintptr))
    542 	if debugStackBarrier {
    543 		print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n")
    544 		if uintptr(*lrPtr) != frame.lr {
    545 			print("frame.lr=", hex(frame.lr))
    546 			throw("frame.lr differs from stack LR")
    547 		}
    548 	}
    549 
    550 	gp.stkbar = gp.stkbar[:len(gp.stkbar)+1]
    551 	stkbar := &gp.stkbar[len(gp.stkbar)-1]
    552 	stkbar.savedLRPtr = lrUintptr
    553 	stkbar.savedLRVal = uintptr(*lrPtr)
    554 	*lrPtr = uintreg(stackBarrierPC)
    555 	return true
    556 }
    557 
    558 // gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
    559 //go:nowritebarrier
    560 func gcRemoveStackBarriers(gp *g) {
    561 	if debugStackBarrier && gp.stkbarPos != 0 {
    562 		print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
    563 	}
    564 
    565 	// Remove stack barriers that we didn't hit.
    566 	for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
    567 		gcRemoveStackBarrier(gp, stkbar)
    568 	}
    569 
    570 	// Clear recorded stack barriers so copystack doesn't try to
    571 	// adjust them.
    572 	gp.stkbarPos = 0
    573 	gp.stkbar = gp.stkbar[:0]
    574 }
    575 
    576 // gcRemoveStackBarrier removes a single stack barrier. It is the
    577 // inverse operation of gcInstallStackBarrier.
    578 //
    579 // This is nosplit to ensure gp's stack does not move.
    580 //
    581 //go:nowritebarrier
    582 //go:nosplit
    583 func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
    584 	if debugStackBarrier {
    585 		print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n")
    586 	}
    587 	lrPtr := (*uintreg)(unsafe.Pointer(stkbar.savedLRPtr))
    588 	if val := *lrPtr; val != uintreg(stackBarrierPC) {
    589 		printlock()
    590 		print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n")
    591 		print("gp.stkbar=")
    592 		gcPrintStkbars(gp.stkbar)
    593 		print(", gp.stkbarPos=", gp.stkbarPos, ", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
    594 		throw("stack barrier lost")
    595 	}
    596 	*lrPtr = uintreg(stkbar.savedLRVal)
    597 }
    598 
    599 // gcPrintStkbars prints a []stkbar for debugging.
    600 func gcPrintStkbars(stkbar []stkbar) {
    601 	print("[")
    602 	for i, s := range stkbar {
    603 		if i > 0 {
    604 			print(" ")
    605 		}
    606 		print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal))
    607 	}
    608 	print("]")
    609 }
    610 
    611 // gcUnwindBarriers marks all stack barriers up the frame containing
    612 // sp as hit and removes them. This is used during stack unwinding for
    613 // panic/recover and by heapBitsBulkBarrier to force stack re-scanning
    614 // when its destination is on the stack.
    615 //
    616 // This is nosplit to ensure gp's stack does not move.
    617 //
    618 //go:nosplit
    619 func gcUnwindBarriers(gp *g, sp uintptr) {
    620 	// On LR machines, if there is a stack barrier on the return
    621 	// from the frame containing sp, this will mark it as hit even
    622 	// though it isn't, but it's okay to be conservative.
    623 	before := gp.stkbarPos
    624 	for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp {
    625 		gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos])
    626 		gp.stkbarPos++
    627 	}
    628 	if debugStackBarrier && gp.stkbarPos != before {
    629 		print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ")
    630 		gcPrintStkbars(gp.stkbar[before:gp.stkbarPos])
    631 		print("\n")
    632 	}
    633 }
    634 
    635 // nextBarrierPC returns the original return PC of the next stack barrier.
    636 // Used by getcallerpc, so it must be nosplit.
    637 //go:nosplit
    638 func nextBarrierPC() uintptr {
    639 	gp := getg()
    640 	return gp.stkbar[gp.stkbarPos].savedLRVal
    641 }
    642 
    643 // setNextBarrierPC sets the return PC of the next stack barrier.
    644 // Used by setcallerpc, so it must be nosplit.
    645 //go:nosplit
    646 func setNextBarrierPC(pc uintptr) {
    647 	gp := getg()
    648 	gp.stkbar[gp.stkbarPos].savedLRVal = pc
    649 }
    650 
    651 // TODO(austin): Can we consolidate the gcDrain* functions?
    652 
    653 // gcDrain scans objects in work buffers, blackening grey
    654 // objects until all work buffers have been drained.
    655 // If flushScanCredit != -1, gcDrain flushes accumulated scan work
    656 // credit to gcController.bgScanCredit whenever gcw's local scan work
    657 // credit exceeds flushScanCredit.
    658 //go:nowritebarrier
    659 func gcDrain(gcw *gcWork, flushScanCredit int64) {
    660 	if !writeBarrierEnabled {
    661 		throw("gcDrain phase incorrect")
    662 	}
    663 
    664 	var lastScanFlush, nextScanFlush int64
    665 	if flushScanCredit != -1 {
    666 		lastScanFlush = gcw.scanWork
    667 		nextScanFlush = lastScanFlush + flushScanCredit
    668 	} else {
    669 		nextScanFlush = int64(^uint64(0) >> 1)
    670 	}
    671 
    672 	for {
    673 		// If another proc wants a pointer, give it some.
    674 		if work.nwait > 0 && work.full == 0 {
    675 			gcw.balance()
    676 		}
    677 
    678 		b := gcw.get()
    679 		if b == 0 {
    680 			// work barrier reached
    681 			break
    682 		}
    683 		// If the current wbuf is filled by the scan a new wbuf might be
    684 		// returned that could possibly hold only a single object. This
    685 		// could result in each iteration draining only a single object
    686 		// out of the wbuf passed in + a single object placed
    687 		// into an empty wbuf in scanobject so there could be
    688 		// a performance hit as we keep fetching fresh wbufs.
    689 		scanobject(b, gcw)
    690 
    691 		// Flush background scan work credit to the global
    692 		// account if we've accumulated enough locally so
    693 		// mutator assists can draw on it.
    694 		if gcw.scanWork >= nextScanFlush {
    695 			credit := gcw.scanWork - lastScanFlush
    696 			xaddint64(&gcController.bgScanCredit, credit)
    697 			lastScanFlush = gcw.scanWork
    698 			nextScanFlush = lastScanFlush + flushScanCredit
    699 		}
    700 	}
    701 	if flushScanCredit != -1 {
    702 		credit := gcw.scanWork - lastScanFlush
    703 		xaddint64(&gcController.bgScanCredit, credit)
    704 	}
    705 }
    706 
    707 // gcDrainUntilPreempt blackens grey objects until g.preempt is set.
    708 // This is best-effort, so it will return as soon as it is unable to
    709 // get work, even though there may be more work in the system.
    710 //go:nowritebarrier
    711 func gcDrainUntilPreempt(gcw *gcWork, flushScanCredit int64) {
    712 	if !writeBarrierEnabled {
    713 		println("gcphase =", gcphase)
    714 		throw("gcDrainUntilPreempt phase incorrect")
    715 	}
    716 
    717 	var lastScanFlush, nextScanFlush int64
    718 	if flushScanCredit != -1 {
    719 		lastScanFlush = gcw.scanWork
    720 		nextScanFlush = lastScanFlush + flushScanCredit
    721 	} else {
    722 		nextScanFlush = int64(^uint64(0) >> 1)
    723 	}
    724 
    725 	gp := getg()
    726 	for !gp.preempt {
    727 		// If the work queue is empty, balance. During
    728 		// concurrent mark we don't really know if anyone else
    729 		// can make use of this work, but even if we're the
    730 		// only worker, the total cost of this per cycle is
    731 		// only O(_WorkbufSize) pointer copies.
    732 		if work.full == 0 && work.partial == 0 {
    733 			gcw.balance()
    734 		}
    735 
    736 		b := gcw.tryGet()
    737 		if b == 0 {
    738 			// No more work
    739 			break
    740 		}
    741 		scanobject(b, gcw)
    742 
    743 		// Flush background scan work credit to the global
    744 		// account if we've accumulated enough locally so
    745 		// mutator assists can draw on it.
    746 		if gcw.scanWork >= nextScanFlush {
    747 			credit := gcw.scanWork - lastScanFlush
    748 			xaddint64(&gcController.bgScanCredit, credit)
    749 			lastScanFlush = gcw.scanWork
    750 			nextScanFlush = lastScanFlush + flushScanCredit
    751 		}
    752 	}
    753 	if flushScanCredit != -1 {
    754 		credit := gcw.scanWork - lastScanFlush
    755 		xaddint64(&gcController.bgScanCredit, credit)
    756 	}
    757 }
    758 
    759 // gcDrainN blackens grey objects until it has performed roughly
    760 // scanWork units of scan work. This is best-effort, so it may perform
    761 // less work if it fails to get a work buffer. Otherwise, it will
    762 // perform at least n units of work, but may perform more because
    763 // scanning is always done in whole object increments.
    764 //go:nowritebarrier
    765 func gcDrainN(gcw *gcWork, scanWork int64) {
    766 	if !writeBarrierEnabled {
    767 		throw("gcDrainN phase incorrect")
    768 	}
    769 	targetScanWork := gcw.scanWork + scanWork
    770 	for gcw.scanWork < targetScanWork {
    771 		// This might be a good place to add prefetch code...
    772 		// if(wbuf.nobj > 4) {
    773 		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
    774 		//  }
    775 		b := gcw.tryGet()
    776 		if b == 0 {
    777 			return
    778 		}
    779 		scanobject(b, gcw)
    780 	}
    781 }
    782 
    783 // scanblock scans b as scanobject would, but using an explicit
    784 // pointer bitmap instead of the heap bitmap.
    785 //
    786 // This is used to scan non-heap roots, so it does not update
    787 // gcw.bytesMarked or gcw.scanWork.
    788 //
    789 //go:nowritebarrier
    790 func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
    791 	// Use local copies of original parameters, so that a stack trace
    792 	// due to one of the throws below shows the original block
    793 	// base and extent.
    794 	b := b0
    795 	n := n0
    796 
    797 	arena_start := mheap_.arena_start
    798 	arena_used := mheap_.arena_used
    799 
    800 	for i := uintptr(0); i < n; {
    801 		// Find bits for the next word.
    802 		bits := uint32(*addb(ptrmask, i/(ptrSize*8)))
    803 		if bits == 0 {
    804 			i += ptrSize * 8
    805 			continue
    806 		}
    807 		for j := 0; j < 8 && i < n; j++ {
    808 			if bits&1 != 0 {
    809 				// Same work as in scanobject; see comments there.
    810 				obj := *(*uintptr)(unsafe.Pointer(b + i))
    811 				if obj != 0 && arena_start <= obj && obj < arena_used {
    812 					if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
    813 						greyobject(obj, b, i, hbits, span, gcw)
    814 					}
    815 				}
    816 			}
    817 			bits >>= 1
    818 			i += ptrSize
    819 		}
    820 	}
    821 }
    822 
    823 // scanobject scans the object starting at b, adding pointers to gcw.
    824 // b must point to the beginning of a heap object; scanobject consults
    825 // the GC bitmap for the pointer mask and the spans for the size of the
    826 // object (it ignores n).
    827 //go:nowritebarrier
    828 func scanobject(b uintptr, gcw *gcWork) {
    829 	// Note that arena_used may change concurrently during
    830 	// scanobject and hence scanobject may encounter a pointer to
    831 	// a newly allocated heap object that is *not* in
    832 	// [start,used). It will not mark this object; however, we
    833 	// know that it was just installed by a mutator, which means
    834 	// that mutator will execute a write barrier and take care of
    835 	// marking it. This is even more pronounced on relaxed memory
    836 	// architectures since we access arena_used without barriers
    837 	// or synchronization, but the same logic applies.
    838 	arena_start := mheap_.arena_start
    839 	arena_used := mheap_.arena_used
    840 
    841 	// Find bits of the beginning of the object.
    842 	// b must point to the beginning of a heap object, so
    843 	// we can get its bits and span directly.
    844 	hbits := heapBitsForAddr(b)
    845 	s := spanOfUnchecked(b)
    846 	n := s.elemsize
    847 	if n == 0 {
    848 		throw("scanobject n == 0")
    849 	}
    850 
    851 	var i uintptr
    852 	for i = 0; i < n; i += ptrSize {
    853 		// Find bits for this word.
    854 		if i != 0 {
    855 			// Avoid needless hbits.next() on last iteration.
    856 			hbits = hbits.next()
    857 		}
    858 		// During checkmarking, 1-word objects store the checkmark
    859 		// in the type bit for the one word. The only one-word objects
    860 		// are pointers, or else they'd be merged with other non-pointer
    861 		// data into larger allocations.
    862 		bits := hbits.bits()
    863 		if i >= 2*ptrSize && bits&bitMarked == 0 {
    864 			break // no more pointers in this object
    865 		}
    866 		if bits&bitPointer == 0 {
    867 			continue // not a pointer
    868 		}
    869 
    870 		// Work here is duplicated in scanblock and above.
    871 		// If you make changes here, make changes there too.
    872 		obj := *(*uintptr)(unsafe.Pointer(b + i))
    873 
    874 		// At this point we have extracted the next potential pointer.
    875 		// Check if it points into heap and not back at the current object.
    876 		if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
    877 			// Mark the object.
    878 			if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
    879 				greyobject(obj, b, i, hbits, span, gcw)
    880 			}
    881 		}
    882 	}
    883 	gcw.bytesMarked += uint64(n)
    884 	gcw.scanWork += int64(i)
    885 }
    886 
    887 // Shade the object if it isn't already.
    888 // The object is not nil and known to be in the heap.
    889 // Preemption must be disabled.
    890 //go:nowritebarrier
    891 func shade(b uintptr) {
    892 	if obj, hbits, span := heapBitsForObject(b); obj != 0 {
    893 		gcw := &getg().m.p.ptr().gcw
    894 		greyobject(obj, 0, 0, hbits, span, gcw)
    895 		if gcphase == _GCmarktermination || gcBlackenPromptly {
    896 			// Ps aren't allowed to cache work during mark
    897 			// termination.
    898 			gcw.dispose()
    899 		}
    900 	}
    901 }
    902 
    903 // obj is the start of an object with mark mbits.
    904 // If it isn't already marked, mark it and enqueue into gcw.
    905 // base and off are for debugging only and could be removed.
    906 //go:nowritebarrier
    907 func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) {
    908 	// obj should be start of allocation, and so must be at least pointer-aligned.
    909 	if obj&(ptrSize-1) != 0 {
    910 		throw("greyobject: obj not pointer-aligned")
    911 	}
    912 
    913 	if useCheckmark {
    914 		if !hbits.isMarked() {
    915 			printlock()
    916 			print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
    917 			print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
    918 
    919 			// Dump the source (base) object
    920 			gcDumpObject("base", base, off)
    921 
    922 			// Dump the object
    923 			gcDumpObject("obj", obj, ^uintptr(0))
    924 
    925 			throw("checkmark found unmarked object")
    926 		}
    927 		if hbits.isCheckmarked(span.elemsize) {
    928 			return
    929 		}
    930 		hbits.setCheckmarked(span.elemsize)
    931 		if !hbits.isCheckmarked(span.elemsize) {
    932 			throw("setCheckmarked and isCheckmarked disagree")
    933 		}
    934 	} else {
    935 		// If marked we have nothing to do.
    936 		if hbits.isMarked() {
    937 			return
    938 		}
    939 		hbits.setMarked()
    940 
    941 		// If this is a noscan object, fast-track it to black
    942 		// instead of greying it.
    943 		if !hbits.hasPointers(span.elemsize) {
    944 			gcw.bytesMarked += uint64(span.elemsize)
    945 			return
    946 		}
    947 	}
    948 
    949 	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
    950 	// seems like a nice optimization that can be added back in.
    951 	// There needs to be time between the PREFETCH and the use.
    952 	// Previously we put the obj in an 8 element buffer that is drained at a rate
    953 	// to give the PREFETCH time to do its work.
    954 	// Use of PREFETCHNTA might be more appropriate than PREFETCH
    955 
    956 	gcw.put(obj)
    957 }
    958 
    959 // gcDumpObject dumps the contents of obj for debugging and marks the
    960 // field at byte offset off in obj.
    961 func gcDumpObject(label string, obj, off uintptr) {
    962 	if obj < mheap_.arena_start || obj >= mheap_.arena_used {
    963 		print(label, "=", hex(obj), " is not a heap object\n")
    964 		return
    965 	}
    966 	k := obj >> _PageShift
    967 	x := k
    968 	x -= mheap_.arena_start >> _PageShift
    969 	s := h_spans[x]
    970 	print(label, "=", hex(obj), " k=", hex(k))
    971 	if s == nil {
    972 		print(" s=nil\n")
    973 		return
    974 	}
    975 	print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
    976 	for i := uintptr(0); i < s.elemsize; i += ptrSize {
    977 		print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)))))
    978 		if i == off {
    979 			print(" <==")
    980 		}
    981 		print("\n")
    982 	}
    983 }
    984 
    985 // If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
    986 //go:nowritebarrier
    987 func gcmarknewobject_m(obj, size uintptr) {
    988 	if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
    989 		throw("gcmarknewobject called while doing checkmark")
    990 	}
    991 	heapBitsForAddr(obj).setMarked()
    992 	xadd64(&work.bytesMarked, int64(size))
    993 }
    994 
    995 // Checkmarking
    996 
    997 // To help debug the concurrent GC we remark with the world
    998 // stopped ensuring that any object encountered has their normal
    999 // mark bit set. To do this we use an orthogonal bit
   1000 // pattern to indicate the object is marked. The following pattern
   1001 // uses the upper two bits in the object's boundary nibble.
   1002 // 01: scalar  not marked
   1003 // 10: pointer not marked
   1004 // 11: pointer     marked
   1005 // 00: scalar      marked
   1006 // Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
   1007 // The higher bit is 1 for pointers and 0 for scalars, whether the object
   1008 // is marked or not.
   1009 // The first nibble no longer holds the typeDead pattern indicating that the
   1010 // there are no more pointers in the object. This information is held
   1011 // in the second nibble.
   1012 
   1013 // If useCheckmark is true, marking of an object uses the
   1014 // checkmark bits (encoding above) instead of the standard
   1015 // mark bits.
   1016 var useCheckmark = false
   1017 
   1018 //go:nowritebarrier
   1019 func initCheckmarks() {
   1020 	useCheckmark = true
   1021 	for _, s := range work.spans {
   1022 		if s.state == _MSpanInUse {
   1023 			heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
   1024 		}
   1025 	}
   1026 }
   1027 
   1028 func clearCheckmarks() {
   1029 	useCheckmark = false
   1030 	for _, s := range work.spans {
   1031 		if s.state == _MSpanInUse {
   1032 			heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
   1033 		}
   1034 	}
   1035 }
   1036