Home | History | Annotate | Download | only in runtime
      1 // Copyright 2009 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Garbage collector: type and heap bitmaps.
      6 //
      7 // Stack, data, and bss bitmaps
      8 //
      9 // Stack frames and global variables in the data and bss sections are described
     10 // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
     11 // to be visited during GC. The bits in each byte are consumed starting with
     12 // the low bit: 1<<0, 1<<1, and so on.
     13 //
     14 // Heap bitmap
     15 //
     16 // The allocated heap comes from a subset of the memory in the range [start, used),
     17 // where start == mheap_.arena_start and used == mheap_.arena_used.
     18 // The heap bitmap comprises 2 bits for each pointer-sized word in that range,
     19 // stored in bytes indexed backward in memory from start.
     20 // That is, the byte at address start-1 holds the 2-bit entries for the four words
     21 // start through start+3*ptrSize, the byte at start-2 holds the entries for
     22 // start+4*ptrSize through start+7*ptrSize, and so on.
     23 //
     24 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
     25 // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
     26 // The meaning of the high bit depends on the position of the word being described
     27 // in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
     28 // In the second word, the high bit is the GC ``checkmarked'' bit (see below).
     29 // In the third and later words, the high bit indicates that the object is still
     30 // being described. In these words, if a bit pair with a high bit 0 is encountered,
     31 // the low bit can also be assumed to be 0, and the object description is over.
     32 // This 00 is called the ``dead'' encoding: it signals that the rest of the words
     33 // in the object are uninteresting to the garbage collector.
     34 //
     35 // The 2-bit entries are split when written into the byte, so that the top half
     36 // of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
     37 // This form allows a copy from the 1-bit to the 4-bit form to keep the
     38 // pointer bits contiguous, instead of having to space them out.
     39 //
     40 // The code makes use of the fact that the zero value for a heap bitmap
     41 // has no live pointer bit set and is (depending on position), not marked,
     42 // not checkmarked, and is the dead encoding.
     43 // These properties must be preserved when modifying the encoding.
     44 //
     45 // Checkmarks
     46 //
     47 // In a concurrent garbage collector, one worries about failing to mark
     48 // a live object due to mutations without write barriers or bugs in the
     49 // collector implementation. As a sanity check, the GC has a 'checkmark'
     50 // mode that retraverses the object graph with the world stopped, to make
     51 // sure that everything that should be marked is marked.
     52 // In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
     53 // for the second word of the object holds the checkmark bit.
     54 // When not in checkmark mode, this bit is set to 1.
     55 //
     56 // The smallest possible allocation is 8 bytes. On a 32-bit machine, that
     57 // means every allocated object has two words, so there is room for the
     58 // checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
     59 // just one word, so the second bit pair is not available for encoding the
     60 // checkmark. However, because non-pointer allocations are combined
     61 // into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
     62 // must be a pointer, so the type bit in the first word is not actually needed.
     63 // It is still used in general, except in checkmark the type bit is repurposed
     64 // as the checkmark bit and then reinitialized (to 1) as the type bit when
     65 // finished.
     66 
     67 package runtime
     68 
     69 import "unsafe"
     70 
     71 const (
     72 	bitPointer = 1 << 0
     73 	bitMarked  = 1 << 4
     74 
     75 	heapBitsShift   = 1                 // shift offset between successive bitPointer or bitMarked entries
     76 	heapBitmapScale = ptrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
     77 
     78 	// all mark/pointer bits in a byte
     79 	bitMarkedAll  = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
     80 	bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
     81 )
     82 
     83 // addb returns the byte pointer p+n.
     84 //go:nowritebarrier
     85 func addb(p *byte, n uintptr) *byte {
     86 	// Note: wrote out full expression instead of calling add(p, n)
     87 	// to reduce the number of temporaries generated by the
     88 	// compiler for this trivial expression during inlining.
     89 	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
     90 }
     91 
     92 // subtractb returns the byte pointer p-n.
     93 //go:nowritebarrier
     94 func subtractb(p *byte, n uintptr) *byte {
     95 	// Note: wrote out full expression instead of calling add(p, -n)
     96 	// to reduce the number of temporaries generated by the
     97 	// compiler for this trivial expression during inlining.
     98 	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
     99 }
    100 
    101 // add1 returns the byte pointer p+1.
    102 //go:nowritebarrier
    103 func add1(p *byte) *byte {
    104 	// Note: wrote out full expression instead of calling addb(p, 1)
    105 	// to reduce the number of temporaries generated by the
    106 	// compiler for this trivial expression during inlining.
    107 	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
    108 }
    109 
    110 // subtract1 returns the byte pointer p-1.
    111 //go:nowritebarrier
    112 //
    113 // nosplit because it is used during write barriers and must not be preempted.
    114 //go:nosplit
    115 func subtract1(p *byte) *byte {
    116 	// Note: wrote out full expression instead of calling subtractb(p, 1)
    117 	// to reduce the number of temporaries generated by the
    118 	// compiler for this trivial expression during inlining.
    119 	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
    120 }
    121 
    122 // mHeap_MapBits is called each time arena_used is extended.
    123 // It maps any additional bitmap memory needed for the new arena memory.
    124 // It must be called with the expected new value of arena_used,
    125 // *before* h.arena_used has been updated.
    126 // Waiting to update arena_used until after the memory has been mapped
    127 // avoids faults when other threads try access the bitmap immediately
    128 // after observing the change to arena_used.
    129 //
    130 //go:nowritebarrier
    131 func mHeap_MapBits(h *mheap, arena_used uintptr) {
    132 	// Caller has added extra mappings to the arena.
    133 	// Add extra mappings of bitmap words as needed.
    134 	// We allocate extra bitmap pieces in chunks of bitmapChunk.
    135 	const bitmapChunk = 8192
    136 
    137 	n := (arena_used - mheap_.arena_start) / heapBitmapScale
    138 	n = round(n, bitmapChunk)
    139 	n = round(n, _PhysPageSize)
    140 	if h.bitmap_mapped >= n {
    141 		return
    142 	}
    143 
    144 	sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
    145 	h.bitmap_mapped = n
    146 }
    147 
    148 // heapBits provides access to the bitmap bits for a single heap word.
    149 // The methods on heapBits take value receivers so that the compiler
    150 // can more easily inline calls to those methods and registerize the
    151 // struct fields independently.
    152 type heapBits struct {
    153 	bitp  *uint8
    154 	shift uint32
    155 }
    156 
    157 // heapBitsForAddr returns the heapBits for the address addr.
    158 // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
    159 //
    160 // nosplit because it is used during write barriers and must not be preempted.
    161 //go:nosplit
    162 func heapBitsForAddr(addr uintptr) heapBits {
    163 	// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
    164 	off := (addr - mheap_.arena_start) / ptrSize
    165 	return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(off & 3)}
    166 }
    167 
    168 // heapBitsForSpan returns the heapBits for the span base address base.
    169 func heapBitsForSpan(base uintptr) (hbits heapBits) {
    170 	if base < mheap_.arena_start || base >= mheap_.arena_used {
    171 		throw("heapBitsForSpan: base out of range")
    172 	}
    173 	hbits = heapBitsForAddr(base)
    174 	if hbits.shift != 0 {
    175 		throw("heapBitsForSpan: unaligned start")
    176 	}
    177 	return hbits
    178 }
    179 
    180 // heapBitsForObject returns the base address for the heap object
    181 // containing the address p, along with the heapBits for base.
    182 // If p does not point into a heap object,
    183 // return base == 0
    184 // otherwise return the base of the object.
    185 func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
    186 	arenaStart := mheap_.arena_start
    187 	if p < arenaStart || p >= mheap_.arena_used {
    188 		return
    189 	}
    190 	off := p - arenaStart
    191 	idx := off >> _PageShift
    192 	// p points into the heap, but possibly to the middle of an object.
    193 	// Consult the span table to find the block beginning.
    194 	k := p >> _PageShift
    195 	s = h_spans[idx]
    196 	if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
    197 		if s == nil || s.state == _MSpanStack {
    198 			// If s is nil, the virtual address has never been part of the heap.
    199 			// This pointer may be to some mmap'd region, so we allow it.
    200 			// Pointers into stacks are also ok, the runtime manages these explicitly.
    201 			return
    202 		}
    203 
    204 		// The following ensures that we are rigorous about what data
    205 		// structures hold valid pointers.
    206 		// TODO(rsc): Check if this still happens.
    207 		if debug.invalidptr != 0 {
    208 			// Still happens sometimes. We don't know why.
    209 			printlock()
    210 			print("runtime:objectstart Span weird: p=", hex(p), " k=", hex(k))
    211 			if s == nil {
    212 				print(" s=nil\n")
    213 			} else {
    214 				print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n")
    215 			}
    216 			printunlock()
    217 			throw("objectstart: bad pointer in unexpected span")
    218 		}
    219 		return
    220 	}
    221 	// If this span holds object of a power of 2 size, just mask off the bits to
    222 	// the interior of the object. Otherwise use the size to get the base.
    223 	if s.baseMask != 0 {
    224 		// optimize for power of 2 sized objects.
    225 		base = s.base()
    226 		base = base + (p-base)&s.baseMask
    227 		// base = p & s.baseMask is faster for small spans,
    228 		// but doesn't work for large spans.
    229 		// Overall, it's faster to use the more general computation above.
    230 	} else {
    231 		base = s.base()
    232 		if p-base >= s.elemsize {
    233 			// n := (p - base) / s.elemsize, using division by multiplication
    234 			n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
    235 			base += n * s.elemsize
    236 		}
    237 	}
    238 	// Now that we know the actual base, compute heapBits to return to caller.
    239 	hbits = heapBitsForAddr(base)
    240 	return
    241 }
    242 
    243 // prefetch the bits.
    244 func (h heapBits) prefetch() {
    245 	prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
    246 }
    247 
    248 // next returns the heapBits describing the next pointer-sized word in memory.
    249 // That is, if h describes address p, h.next() describes p+ptrSize.
    250 // Note that next does not modify h. The caller must record the result.
    251 //
    252 // nosplit because it is used during write barriers and must not be preempted.
    253 //go:nosplit
    254 func (h heapBits) next() heapBits {
    255 	if h.shift < 3*heapBitsShift {
    256 		return heapBits{h.bitp, h.shift + heapBitsShift}
    257 	}
    258 	return heapBits{subtract1(h.bitp), 0}
    259 }
    260 
    261 // forward returns the heapBits describing n pointer-sized words ahead of h in memory.
    262 // That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
    263 // h.forward(1) is equivalent to h.next(), just slower.
    264 // Note that forward does not modify h. The caller must record the result.
    265 // bits returns the heap bits for the current word.
    266 func (h heapBits) forward(n uintptr) heapBits {
    267 	n += uintptr(h.shift) / heapBitsShift
    268 	return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
    269 }
    270 
    271 // The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
    272 // The result includes in its higher bits the bits for subsequent words
    273 // described by the same bitmap byte.
    274 func (h heapBits) bits() uint32 {
    275 	return uint32(*h.bitp) >> h.shift
    276 }
    277 
    278 // isMarked reports whether the heap bits have the marked bit set.
    279 // h must describe the initial word of the object.
    280 func (h heapBits) isMarked() bool {
    281 	return *h.bitp&(bitMarked<<h.shift) != 0
    282 }
    283 
    284 // setMarked sets the marked bit in the heap bits, atomically.
    285 // h must describe the initial word of the object.
    286 func (h heapBits) setMarked() {
    287 	// Each byte of GC bitmap holds info for four words.
    288 	// Might be racing with other updates, so use atomic update always.
    289 	// We used to be clever here and use a non-atomic update in certain
    290 	// cases, but it's not worth the risk.
    291 	atomicor8(h.bitp, bitMarked<<h.shift)
    292 }
    293 
    294 // setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
    295 // h must describe the initial word of the object.
    296 func (h heapBits) setMarkedNonAtomic() {
    297 	*h.bitp |= bitMarked << h.shift
    298 }
    299 
    300 // isPointer reports whether the heap bits describe a pointer word.
    301 // h must describe the initial word of the object.
    302 //
    303 // nosplit because it is used during write barriers and must not be preempted.
    304 //go:nosplit
    305 func (h heapBits) isPointer() bool {
    306 	return (*h.bitp>>h.shift)&bitPointer != 0
    307 }
    308 
    309 // hasPointers reports whether the given object has any pointers.
    310 // It must be told how large the object at h is, so that it does not read too
    311 // far into the bitmap.
    312 // h must describe the initial word of the object.
    313 func (h heapBits) hasPointers(size uintptr) bool {
    314 	if size == ptrSize { // 1-word objects are always pointers
    315 		return true
    316 	}
    317 	// Otherwise, at least a 2-word object, and at least 2-word aligned,
    318 	// so h.shift is either 0 or 4, so we know we can get the bits for the
    319 	// first two words out of *h.bitp.
    320 	// If either of the first two words is a pointer, not pointer free.
    321 	b := uint32(*h.bitp >> h.shift)
    322 	if b&(bitPointer|bitPointer<<heapBitsShift) != 0 {
    323 		return true
    324 	}
    325 	if size == 2*ptrSize {
    326 		return false
    327 	}
    328 	// At least a 4-word object. Check scan bit (aka marked bit) in third word.
    329 	if h.shift == 0 {
    330 		return b&(bitMarked<<(2*heapBitsShift)) != 0
    331 	}
    332 	return uint32(*subtract1(h.bitp))&bitMarked != 0
    333 }
    334 
    335 // isCheckmarked reports whether the heap bits have the checkmarked bit set.
    336 // It must be told how large the object at h is, because the encoding of the
    337 // checkmark bit varies by size.
    338 // h must describe the initial word of the object.
    339 func (h heapBits) isCheckmarked(size uintptr) bool {
    340 	if size == ptrSize {
    341 		return (*h.bitp>>h.shift)&bitPointer != 0
    342 	}
    343 	// All multiword objects are 2-word aligned,
    344 	// so we know that the initial word's 2-bit pair
    345 	// and the second word's 2-bit pair are in the
    346 	// same heap bitmap byte, *h.bitp.
    347 	return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0
    348 }
    349 
    350 // setCheckmarked sets the checkmarked bit.
    351 // It must be told how large the object at h is, because the encoding of the
    352 // checkmark bit varies by size.
    353 // h must describe the initial word of the object.
    354 func (h heapBits) setCheckmarked(size uintptr) {
    355 	if size == ptrSize {
    356 		atomicor8(h.bitp, bitPointer<<h.shift)
    357 		return
    358 	}
    359 	atomicor8(h.bitp, bitMarked<<(heapBitsShift+h.shift))
    360 }
    361 
    362 // heapBitsBulkBarrier executes writebarrierptr_nostore
    363 // for every pointer slot in the memory range [p, p+size),
    364 // using the heap bitmap to locate those pointer slots.
    365 // This executes the write barriers necessary after a memmove.
    366 // Both p and size must be pointer-aligned.
    367 // The range [p, p+size) must lie within a single allocation.
    368 //
    369 // Callers should call heapBitsBulkBarrier immediately after
    370 // calling memmove(p, src, size). This function is marked nosplit
    371 // to avoid being preempted; the GC must not stop the goroutine
    372 // between the memmove and the execution of the barriers.
    373 //
    374 // The heap bitmap is not maintained for allocations containing
    375 // no pointers at all; any caller of heapBitsBulkBarrier must first
    376 // make sure the underlying allocation contains pointers, usually
    377 // by checking typ.kind&kindNoPointers.
    378 //
    379 //go:nosplit
    380 func heapBitsBulkBarrier(p, size uintptr) {
    381 	if (p|size)&(ptrSize-1) != 0 {
    382 		throw("heapBitsBulkBarrier: unaligned arguments")
    383 	}
    384 	if !writeBarrierEnabled {
    385 		return
    386 	}
    387 	if !inheap(p) {
    388 		// If p is on the stack and in a higher frame than the
    389 		// caller, we either need to execute write barriers on
    390 		// it (which is what happens for normal stack writes
    391 		// through pointers to higher frames), or we need to
    392 		// force the mark termination stack scan to scan the
    393 		// frame containing p.
    394 		//
    395 		// Executing write barriers on p is complicated in the
    396 		// general case because we either need to unwind the
    397 		// stack to get the stack map, or we need the type's
    398 		// bitmap, which may be a GC program.
    399 		//
    400 		// Hence, we opt for forcing the re-scan to scan the
    401 		// frame containing p, which we can do by simply
    402 		// unwinding the stack barriers between the current SP
    403 		// and p's frame.
    404 		gp := getg().m.curg
    405 		if gp != nil && gp.stack.lo <= p && p < gp.stack.hi {
    406 			// Run on the system stack to give it more
    407 			// stack space.
    408 			systemstack(func() {
    409 				gcUnwindBarriers(gp, p)
    410 			})
    411 		}
    412 		return
    413 	}
    414 
    415 	h := heapBitsForAddr(p)
    416 	for i := uintptr(0); i < size; i += ptrSize {
    417 		if h.isPointer() {
    418 			x := (*uintptr)(unsafe.Pointer(p + i))
    419 			writebarrierptr_nostore(x, *x)
    420 		}
    421 		h = h.next()
    422 	}
    423 }
    424 
    425 // typeBitsBulkBarrier executes writebarrierptr_nostore
    426 // for every pointer slot in the memory range [p, p+size),
    427 // using the type bitmap to locate those pointer slots.
    428 // The type typ must correspond exactly to [p, p+size).
    429 // This executes the write barriers necessary after a copy.
    430 // Both p and size must be pointer-aligned.
    431 // The type typ must have a plain bitmap, not a GC program.
    432 // The only use of this function is in channel sends, and the
    433 // 64 kB channel element limit takes care of this for us.
    434 //
    435 // Must not be preempted because it typically runs right after memmove,
    436 // and the GC must not complete between those two.
    437 //
    438 //go:nosplit
    439 func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
    440 	if typ == nil {
    441 		throw("runtime: typeBitsBulkBarrier without type")
    442 	}
    443 	if typ.size != size {
    444 		println("runtime: typeBitsBulkBarrier with type ", *typ._string, " of size ", typ.size, " but memory size", size)
    445 		throw("runtime: invalid typeBitsBulkBarrier")
    446 	}
    447 	if typ.kind&kindGCProg != 0 {
    448 		println("runtime: typeBitsBulkBarrier with type ", *typ._string, " with GC prog")
    449 		throw("runtime: invalid typeBitsBulkBarrier")
    450 	}
    451 	if !writeBarrierEnabled {
    452 		return
    453 	}
    454 	ptrmask := typ.gcdata
    455 	var bits uint32
    456 	for i := uintptr(0); i < typ.ptrdata; i += ptrSize {
    457 		if i&(ptrSize*8-1) == 0 {
    458 			bits = uint32(*ptrmask)
    459 			ptrmask = addb(ptrmask, 1)
    460 		} else {
    461 			bits = bits >> 1
    462 		}
    463 		if bits&1 != 0 {
    464 			x := (*uintptr)(unsafe.Pointer(p + i))
    465 			writebarrierptr_nostore(x, *x)
    466 		}
    467 	}
    468 }
    469 
    470 // The methods operating on spans all require that h has been returned
    471 // by heapBitsForSpan and that size, n, total are the span layout description
    472 // returned by the mspan's layout method.
    473 // If total > size*n, it means that there is extra leftover memory in the span,
    474 // usually due to rounding.
    475 //
    476 // TODO(rsc): Perhaps introduce a different heapBitsSpan type.
    477 
    478 // initSpan initializes the heap bitmap for a span.
    479 func (h heapBits) initSpan(size, n, total uintptr) {
    480 	if total%heapBitmapScale != 0 {
    481 		throw("initSpan: unaligned length")
    482 	}
    483 	nbyte := total / heapBitmapScale
    484 	if ptrSize == 8 && size == ptrSize {
    485 		end := h.bitp
    486 		bitp := subtractb(end, nbyte-1)
    487 		for {
    488 			*bitp = bitPointerAll
    489 			if bitp == end {
    490 				break
    491 			}
    492 			bitp = add1(bitp)
    493 		}
    494 		return
    495 	}
    496 	memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
    497 }
    498 
    499 // initCheckmarkSpan initializes a span for being checkmarked.
    500 // It clears the checkmark bits, which are set to 1 in normal operation.
    501 func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
    502 	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
    503 	if ptrSize == 8 && size == ptrSize {
    504 		// Checkmark bit is type bit, bottom bit of every 2-bit entry.
    505 		// Only possible on 64-bit system, since minimum size is 8.
    506 		// Must clear type bit (checkmark bit) of every word.
    507 		// The type bit is the lower of every two-bit pair.
    508 		bitp := h.bitp
    509 		for i := uintptr(0); i < n; i += 4 {
    510 			*bitp &^= bitPointerAll
    511 			bitp = subtract1(bitp)
    512 		}
    513 		return
    514 	}
    515 	for i := uintptr(0); i < n; i++ {
    516 		*h.bitp &^= bitMarked << (heapBitsShift + h.shift)
    517 		h = h.forward(size / ptrSize)
    518 	}
    519 }
    520 
    521 // clearCheckmarkSpan undoes all the checkmarking in a span.
    522 // The actual checkmark bits are ignored, so the only work to do
    523 // is to fix the pointer bits. (Pointer bits are ignored by scanobject
    524 // but consulted by typedmemmove.)
    525 func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
    526 	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
    527 	if ptrSize == 8 && size == ptrSize {
    528 		// Checkmark bit is type bit, bottom bit of every 2-bit entry.
    529 		// Only possible on 64-bit system, since minimum size is 8.
    530 		// Must clear type bit (checkmark bit) of every word.
    531 		// The type bit is the lower of every two-bit pair.
    532 		bitp := h.bitp
    533 		for i := uintptr(0); i < n; i += 4 {
    534 			*bitp |= bitPointerAll
    535 			bitp = subtract1(bitp)
    536 		}
    537 	}
    538 }
    539 
    540 // heapBitsSweepSpan coordinates the sweeping of a span by reading
    541 // and updating the corresponding heap bitmap entries.
    542 // For each free object in the span, heapBitsSweepSpan sets the type
    543 // bits for the first two words (or one for single-word objects) to typeDead
    544 // and then calls f(p), where p is the object's base address.
    545 // f is expected to add the object to a free list.
    546 // For non-free objects, heapBitsSweepSpan turns off the marked bit.
    547 func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
    548 	h := heapBitsForSpan(base)
    549 	switch {
    550 	default:
    551 		throw("heapBitsSweepSpan")
    552 	case ptrSize == 8 && size == ptrSize:
    553 		// Consider mark bits in all four 2-bit entries of each bitmap byte.
    554 		bitp := h.bitp
    555 		for i := uintptr(0); i < n; i += 4 {
    556 			x := uint32(*bitp)
    557 			// Note that unlike the other size cases, we leave the pointer bits set here.
    558 			// These are initialized during initSpan when the span is created and left
    559 			// in place the whole time the span is used for pointer-sized objects.
    560 			// That lets heapBitsSetType avoid an atomic update to set the pointer bit
    561 			// during allocation.
    562 			if x&bitMarked != 0 {
    563 				x &^= bitMarked
    564 			} else {
    565 				f(base + i*ptrSize)
    566 			}
    567 			if x&(bitMarked<<heapBitsShift) != 0 {
    568 				x &^= bitMarked << heapBitsShift
    569 			} else {
    570 				f(base + (i+1)*ptrSize)
    571 			}
    572 			if x&(bitMarked<<(2*heapBitsShift)) != 0 {
    573 				x &^= bitMarked << (2 * heapBitsShift)
    574 			} else {
    575 				f(base + (i+2)*ptrSize)
    576 			}
    577 			if x&(bitMarked<<(3*heapBitsShift)) != 0 {
    578 				x &^= bitMarked << (3 * heapBitsShift)
    579 			} else {
    580 				f(base + (i+3)*ptrSize)
    581 			}
    582 			*bitp = uint8(x)
    583 			bitp = subtract1(bitp)
    584 		}
    585 
    586 	case size%(4*ptrSize) == 0:
    587 		// Mark bit is in first word of each object.
    588 		// Each object starts at bit 0 of a heap bitmap byte.
    589 		bitp := h.bitp
    590 		step := size / heapBitmapScale
    591 		for i := uintptr(0); i < n; i++ {
    592 			x := uint32(*bitp)
    593 			if x&bitMarked != 0 {
    594 				x &^= bitMarked
    595 			} else {
    596 				x = 0
    597 				f(base + i*size)
    598 			}
    599 			*bitp = uint8(x)
    600 			bitp = subtractb(bitp, step)
    601 		}
    602 
    603 	case size%(4*ptrSize) == 2*ptrSize:
    604 		// Mark bit is in first word of each object,
    605 		// but every other object starts halfway through a heap bitmap byte.
    606 		// Unroll loop 2x to handle alternating shift count and step size.
    607 		bitp := h.bitp
    608 		step := size / heapBitmapScale
    609 		var i uintptr
    610 		for i = uintptr(0); i < n; i += 2 {
    611 			x := uint32(*bitp)
    612 			if x&bitMarked != 0 {
    613 				x &^= bitMarked
    614 			} else {
    615 				x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
    616 				f(base + i*size)
    617 				if size > 2*ptrSize {
    618 					x = 0
    619 				}
    620 			}
    621 			*bitp = uint8(x)
    622 			if i+1 >= n {
    623 				break
    624 			}
    625 			bitp = subtractb(bitp, step)
    626 			x = uint32(*bitp)
    627 			if x&(bitMarked<<(2*heapBitsShift)) != 0 {
    628 				x &^= bitMarked << (2 * heapBitsShift)
    629 			} else {
    630 				x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
    631 				f(base + (i+1)*size)
    632 				if size > 2*ptrSize {
    633 					*subtract1(bitp) = 0
    634 				}
    635 			}
    636 			*bitp = uint8(x)
    637 			bitp = subtractb(bitp, step+1)
    638 		}
    639 	}
    640 }
    641 
    642 // heapBitsSetType records that the new allocation [x, x+size)
    643 // holds in [x, x+dataSize) one or more values of type typ.
    644 // (The number of values is given by dataSize / typ.size.)
    645 // If dataSize < size, the fragment [x+dataSize, x+size) is
    646 // recorded as non-pointer data.
    647 // It is known that the type has pointers somewhere;
    648 // malloc does not call heapBitsSetType when there are no pointers,
    649 // because all free objects are marked as noscan during
    650 // heapBitsSweepSpan.
    651 // There can only be one allocation from a given span active at a time,
    652 // so this code is not racing with other instances of itself,
    653 // and we don't allocate from a span until it has been swept,
    654 // so this code is not racing with heapBitsSweepSpan.
    655 // It is, however, racing with the concurrent GC mark phase,
    656 // which can be setting the mark bit in the leading 2-bit entry
    657 // of an allocated block. The block we are modifying is not quite
    658 // allocated yet, so the GC marker is not racing with updates to x's bits,
    659 // but if the start or end of x shares a bitmap byte with an adjacent
    660 // object, the GC marker is racing with updates to those object's mark bits.
    661 func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
    662 	const doubleCheck = false // slow but helpful; enable to test modifications to this code
    663 
    664 	// dataSize is always size rounded up to the next malloc size class,
    665 	// except in the case of allocating a defer block, in which case
    666 	// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
    667 	// arbitrarily larger.
    668 	//
    669 	// The checks for size == ptrSize and size == 2*ptrSize can therefore
    670 	// assume that dataSize == size without checking it explicitly.
    671 
    672 	if ptrSize == 8 && size == ptrSize {
    673 		// It's one word and it has pointers, it must be a pointer.
    674 		// In general we'd need an atomic update here if the
    675 		// concurrent GC were marking objects in this span,
    676 		// because each bitmap byte describes 3 other objects
    677 		// in addition to the one being allocated.
    678 		// However, since all allocated one-word objects are pointers
    679 		// (non-pointers are aggregated into tinySize allocations),
    680 		// initSpan sets the pointer bits for us. Nothing to do here.
    681 		if doubleCheck {
    682 			h := heapBitsForAddr(x)
    683 			if !h.isPointer() {
    684 				throw("heapBitsSetType: pointer bit missing")
    685 			}
    686 		}
    687 		return
    688 	}
    689 
    690 	h := heapBitsForAddr(x)
    691 	ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
    692 
    693 	// Heap bitmap bits for 2-word object are only 4 bits,
    694 	// so also shared with objects next to it; use atomic updates.
    695 	// This is called out as a special case primarily for 32-bit systems,
    696 	// so that on 32-bit systems the code below can assume all objects
    697 	// are 4-word aligned (because they're all 16-byte aligned).
    698 	if size == 2*ptrSize {
    699 		if typ.size == ptrSize {
    700 			// We're allocating a block big enough to hold two pointers.
    701 			// On 64-bit, that means the actual object must be two pointers,
    702 			// or else we'd have used the one-pointer-sized block.
    703 			// On 32-bit, however, this is the 8-byte block, the smallest one.
    704 			// So it could be that we're allocating one pointer and this was
    705 			// just the smallest block available. Distinguish by checking dataSize.
    706 			// (In general the number of instances of typ being allocated is
    707 			// dataSize/typ.size.)
    708 			if ptrSize == 4 && dataSize == ptrSize {
    709 				// 1 pointer.
    710 				if gcphase == _GCoff {
    711 					*h.bitp |= bitPointer << h.shift
    712 				} else {
    713 					atomicor8(h.bitp, bitPointer<<h.shift)
    714 				}
    715 			} else {
    716 				// 2-element slice of pointer.
    717 				if gcphase == _GCoff {
    718 					*h.bitp |= (bitPointer | bitPointer<<heapBitsShift) << h.shift
    719 				} else {
    720 					atomicor8(h.bitp, (bitPointer|bitPointer<<heapBitsShift)<<h.shift)
    721 				}
    722 			}
    723 			return
    724 		}
    725 		// Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
    726 		if doubleCheck {
    727 			if typ.size != 2*ptrSize || typ.kind&kindGCProg != 0 {
    728 				print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
    729 				throw("heapBitsSetType")
    730 			}
    731 		}
    732 		b := uint32(*ptrmask)
    733 		hb := b & 3
    734 		if gcphase == _GCoff {
    735 			*h.bitp |= uint8(hb << h.shift)
    736 		} else {
    737 			atomicor8(h.bitp, uint8(hb<<h.shift))
    738 		}
    739 		return
    740 	}
    741 
    742 	// Copy from 1-bit ptrmask into 2-bit bitmap.
    743 	// The basic approach is to use a single uintptr as a bit buffer,
    744 	// alternating between reloading the buffer and writing bitmap bytes.
    745 	// In general, one load can supply two bitmap byte writes.
    746 	// This is a lot of lines of code, but it compiles into relatively few
    747 	// machine instructions.
    748 
    749 	var (
    750 		// Ptrmask input.
    751 		p     *byte   // last ptrmask byte read
    752 		b     uintptr // ptrmask bits already loaded
    753 		nb    uintptr // number of bits in b at next read
    754 		endp  *byte   // final ptrmask byte to read (then repeat)
    755 		endnb uintptr // number of valid bits in *endp
    756 		pbits uintptr // alternate source of bits
    757 
    758 		// Heap bitmap output.
    759 		w     uintptr // words processed
    760 		nw    uintptr // number of words to process
    761 		hbitp *byte   // next heap bitmap byte to write
    762 		hb    uintptr // bits being prepared for *hbitp
    763 	)
    764 
    765 	hbitp = h.bitp
    766 
    767 	// Handle GC program. Delayed until this part of the code
    768 	// so that we can use the same double-checking mechanism
    769 	// as the 1-bit case. Nothing above could have encountered
    770 	// GC programs: the cases were all too small.
    771 	if typ.kind&kindGCProg != 0 {
    772 		heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
    773 		if doubleCheck {
    774 			// Double-check the heap bits written by GC program
    775 			// by running the GC program to create a 1-bit pointer mask
    776 			// and then jumping to the double-check code below.
    777 			// This doesn't catch bugs shared between the 1-bit and 4-bit
    778 			// GC program execution, but it does catch mistakes specific
    779 			// to just one of those and bugs in heapBitsSetTypeGCProg's
    780 			// implementation of arrays.
    781 			lock(&debugPtrmask.lock)
    782 			if debugPtrmask.data == nil {
    783 				debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
    784 			}
    785 			ptrmask = debugPtrmask.data
    786 			runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
    787 			goto Phase4
    788 		}
    789 		return
    790 	}
    791 
    792 	// Note about sizes:
    793 	//
    794 	// typ.size is the number of words in the object,
    795 	// and typ.ptrdata is the number of words in the prefix
    796 	// of the object that contains pointers. That is, the final
    797 	// typ.size - typ.ptrdata words contain no pointers.
    798 	// This allows optimization of a common pattern where
    799 	// an object has a small header followed by a large scalar
    800 	// buffer. If we know the pointers are over, we don't have
    801 	// to scan the buffer's heap bitmap at all.
    802 	// The 1-bit ptrmasks are sized to contain only bits for
    803 	// the typ.ptrdata prefix, zero padded out to a full byte
    804 	// of bitmap. This code sets nw (below) so that heap bitmap
    805 	// bits are only written for the typ.ptrdata prefix; if there is
    806 	// more room in the allocated object, the next heap bitmap
    807 	// entry is a 00, indicating that there are no more pointers
    808 	// to scan. So only the ptrmask for the ptrdata bytes is needed.
    809 	//
    810 	// Replicated copies are not as nice: if there is an array of
    811 	// objects with scalar tails, all but the last tail does have to
    812 	// be initialized, because there is no way to say "skip forward".
    813 	// However, because of the possibility of a repeated type with
    814 	// size not a multiple of 4 pointers (one heap bitmap byte),
    815 	// the code already must handle the last ptrmask byte specially
    816 	// by treating it as containing only the bits for endnb pointers,
    817 	// where endnb <= 4. We represent large scalar tails that must
    818 	// be expanded in the replication by setting endnb larger than 4.
    819 	// This will have the effect of reading many bits out of b,
    820 	// but once the real bits are shifted out, b will supply as many
    821 	// zero bits as we try to read, which is exactly what we need.
    822 
    823 	p = ptrmask
    824 	if typ.size < dataSize {
    825 		// Filling in bits for an array of typ.
    826 		// Set up for repetition of ptrmask during main loop.
    827 		// Note that ptrmask describes only a prefix of
    828 		const maxBits = ptrSize*8 - 7
    829 		if typ.ptrdata/ptrSize <= maxBits {
    830 			// Entire ptrmask fits in uintptr with room for a byte fragment.
    831 			// Load into pbits and never read from ptrmask again.
    832 			// This is especially important when the ptrmask has
    833 			// fewer than 8 bits in it; otherwise the reload in the middle
    834 			// of the Phase 2 loop would itself need to loop to gather
    835 			// at least 8 bits.
    836 
    837 			// Accumulate ptrmask into b.
    838 			// ptrmask is sized to describe only typ.ptrdata, but we record
    839 			// it as describing typ.size bytes, since all the high bits are zero.
    840 			nb = typ.ptrdata / ptrSize
    841 			for i := uintptr(0); i < nb; i += 8 {
    842 				b |= uintptr(*p) << i
    843 				p = add1(p)
    844 			}
    845 			nb = typ.size / ptrSize
    846 
    847 			// Replicate ptrmask to fill entire pbits uintptr.
    848 			// Doubling and truncating is fewer steps than
    849 			// iterating by nb each time. (nb could be 1.)
    850 			// Since we loaded typ.ptrdata/ptrSize bits
    851 			// but are pretending to have typ.size/ptrSize,
    852 			// there might be no replication necessary/possible.
    853 			pbits = b
    854 			endnb = nb
    855 			if nb+nb <= maxBits {
    856 				for endnb <= ptrSize*8 {
    857 					pbits |= pbits << endnb
    858 					endnb += endnb
    859 				}
    860 				// Truncate to a multiple of original ptrmask.
    861 				endnb = maxBits / nb * nb
    862 				pbits &= 1<<endnb - 1
    863 				b = pbits
    864 				nb = endnb
    865 			}
    866 
    867 			// Clear p and endp as sentinel for using pbits.
    868 			// Checked during Phase 2 loop.
    869 			p = nil
    870 			endp = nil
    871 		} else {
    872 			// Ptrmask is larger. Read it multiple times.
    873 			n := (typ.ptrdata/ptrSize+7)/8 - 1
    874 			endp = addb(ptrmask, n)
    875 			endnb = typ.size/ptrSize - n*8
    876 		}
    877 	}
    878 	if p != nil {
    879 		b = uintptr(*p)
    880 		p = add1(p)
    881 		nb = 8
    882 	}
    883 
    884 	if typ.size == dataSize {
    885 		// Single entry: can stop once we reach the non-pointer data.
    886 		nw = typ.ptrdata / ptrSize
    887 	} else {
    888 		// Repeated instances of typ in an array.
    889 		// Have to process first N-1 entries in full, but can stop
    890 		// once we reach the non-pointer data in the final entry.
    891 		nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / ptrSize
    892 	}
    893 	if nw == 0 {
    894 		// No pointers! Caller was supposed to check.
    895 		println("runtime: invalid type ", *typ._string)
    896 		throw("heapBitsSetType: called with non-pointer type")
    897 		return
    898 	}
    899 	if nw < 2 {
    900 		// Must write at least 2 words, because the "no scan"
    901 		// encoding doesn't take effect until the third word.
    902 		nw = 2
    903 	}
    904 
    905 	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
    906 	// The leading byte is special because it contains the bits for words 0 and 1,
    907 	// which do not have the marked bits set.
    908 	// The leading half-byte is special because it's a half a byte and must be
    909 	// manipulated atomically.
    910 	switch {
    911 	default:
    912 		throw("heapBitsSetType: unexpected shift")
    913 
    914 	case h.shift == 0:
    915 		// Ptrmask and heap bitmap are aligned.
    916 		// Handle first byte of bitmap specially.
    917 		// The first byte we write out contains the first two words of the object.
    918 		// In those words, the mark bits are mark and checkmark, respectively,
    919 		// and must not be set. In all following words, we want to set the mark bit
    920 		// as a signal that the object continues to the next 2-bit entry in the bitmap.
    921 		hb = b & bitPointerAll
    922 		hb |= bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
    923 		if w += 4; w >= nw {
    924 			goto Phase3
    925 		}
    926 		*hbitp = uint8(hb)
    927 		hbitp = subtract1(hbitp)
    928 		b >>= 4
    929 		nb -= 4
    930 
    931 	case ptrSize == 8 && h.shift == 2:
    932 		// Ptrmask and heap bitmap are misaligned.
    933 		// The bits for the first two words are in a byte shared with another object
    934 		// and must be updated atomically.
    935 		// NOTE(rsc): The atomic here may not be necessary.
    936 		// We took care of 1-word and 2-word objects above,
    937 		// so this is at least a 6-word object, so our start bits
    938 		// are shared only with the type bits of another object,
    939 		// not with its mark bit. Since there is only one allocation
    940 		// from a given span at a time, we should be able to set
    941 		// these bits non-atomically. Not worth the risk right now.
    942 		hb = (b & 3) << (2 * heapBitsShift)
    943 		b >>= 2
    944 		nb -= 2
    945 		// Note: no bitMarker in hb because the first two words don't get markers from us.
    946 		if gcphase == _GCoff {
    947 			*hbitp |= uint8(hb)
    948 		} else {
    949 			atomicor8(hbitp, uint8(hb))
    950 		}
    951 		hbitp = subtract1(hbitp)
    952 		if w += 2; w >= nw {
    953 			// We know that there is more data, because we handled 2-word objects above.
    954 			// This must be at least a 6-word object. If we're out of pointer words,
    955 			// mark no scan in next bitmap byte and finish.
    956 			hb = 0
    957 			w += 4
    958 			goto Phase3
    959 		}
    960 	}
    961 
    962 	// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
    963 	// The loop computes the bits for that last write but does not execute the write;
    964 	// it leaves the bits in hb for processing by phase 3.
    965 	// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
    966 	// use in the first half of the loop right now, and then we only adjust nb explicitly
    967 	// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
    968 	nb -= 4
    969 	for {
    970 		// Emit bitmap byte.
    971 		// b has at least nb+4 bits, with one exception:
    972 		// if w+4 >= nw, then b has only nw-w bits,
    973 		// but we'll stop at the break and then truncate
    974 		// appropriately in Phase 3.
    975 		hb = b & bitPointerAll
    976 		hb |= bitMarkedAll
    977 		if w += 4; w >= nw {
    978 			break
    979 		}
    980 		*hbitp = uint8(hb)
    981 		hbitp = subtract1(hbitp)
    982 		b >>= 4
    983 
    984 		// Load more bits. b has nb right now.
    985 		if p != endp {
    986 			// Fast path: keep reading from ptrmask.
    987 			// nb unmodified: we just loaded 8 bits,
    988 			// and the next iteration will consume 8 bits,
    989 			// leaving us with the same nb the next time we're here.
    990 			if nb < 8 {
    991 				b |= uintptr(*p) << nb
    992 				p = add1(p)
    993 			} else {
    994 				// Reduce the number of bits in b.
    995 				// This is important if we skipped
    996 				// over a scalar tail, since nb could
    997 				// be larger than the bit width of b.
    998 				nb -= 8
    999 			}
   1000 		} else if p == nil {
   1001 			// Almost as fast path: track bit count and refill from pbits.
   1002 			// For short repetitions.
   1003 			if nb < 8 {
   1004 				b |= pbits << nb
   1005 				nb += endnb
   1006 			}
   1007 			nb -= 8 // for next iteration
   1008 		} else {
   1009 			// Slow path: reached end of ptrmask.
   1010 			// Process final partial byte and rewind to start.
   1011 			b |= uintptr(*p) << nb
   1012 			nb += endnb
   1013 			if nb < 8 {
   1014 				b |= uintptr(*ptrmask) << nb
   1015 				p = add1(ptrmask)
   1016 			} else {
   1017 				nb -= 8
   1018 				p = ptrmask
   1019 			}
   1020 		}
   1021 
   1022 		// Emit bitmap byte.
   1023 		hb = b & bitPointerAll
   1024 		hb |= bitMarkedAll
   1025 		if w += 4; w >= nw {
   1026 			break
   1027 		}
   1028 		*hbitp = uint8(hb)
   1029 		hbitp = subtract1(hbitp)
   1030 		b >>= 4
   1031 	}
   1032 
   1033 Phase3:
   1034 	// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
   1035 	if w > nw {
   1036 		// Counting the 4 entries in hb not yet written to memory,
   1037 		// there are more entries than possible pointer slots.
   1038 		// Discard the excess entries (can't be more than 3).
   1039 		mask := uintptr(1)<<(4-(w-nw)) - 1
   1040 		hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits
   1041 	}
   1042 
   1043 	// Change nw from counting possibly-pointer words to total words in allocation.
   1044 	nw = size / ptrSize
   1045 
   1046 	// Write whole bitmap bytes.
   1047 	// The first is hb, the rest are zero.
   1048 	if w <= nw {
   1049 		*hbitp = uint8(hb)
   1050 		hbitp = subtract1(hbitp)
   1051 		hb = 0 // for possible final half-byte below
   1052 		for w += 4; w <= nw; w += 4 {
   1053 			*hbitp = 0
   1054 			hbitp = subtract1(hbitp)
   1055 		}
   1056 	}
   1057 
   1058 	// Write final partial bitmap byte if any.
   1059 	// We know w > nw, or else we'd still be in the loop above.
   1060 	// It can be bigger only due to the 4 entries in hb that it counts.
   1061 	// If w == nw+4 then there's nothing left to do: we wrote all nw entries
   1062 	// and can discard the 4 sitting in hb.
   1063 	// But if w == nw+2, we need to write first two in hb.
   1064 	// The byte is shared with the next object so we may need an atomic.
   1065 	if w == nw+2 {
   1066 		if gcphase == _GCoff {
   1067 			*hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb)
   1068 		} else {
   1069 			atomicand8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift))
   1070 			atomicor8(hbitp, uint8(hb))
   1071 		}
   1072 	}
   1073 
   1074 Phase4:
   1075 	// Phase 4: all done, but perhaps double check.
   1076 	if doubleCheck {
   1077 		end := heapBitsForAddr(x + size)
   1078 		if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
   1079 			println("ended at wrong bitmap byte for", *typ._string, "x", dataSize/typ.size)
   1080 			print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
   1081 			print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
   1082 			h0 := heapBitsForAddr(x)
   1083 			print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
   1084 			print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
   1085 			throw("bad heapBitsSetType")
   1086 		}
   1087 
   1088 		// Double-check that bits to be written were written correctly.
   1089 		// Does not check that other bits were not written, unfortunately.
   1090 		h := heapBitsForAddr(x)
   1091 		nptr := typ.ptrdata / ptrSize
   1092 		ndata := typ.size / ptrSize
   1093 		count := dataSize / typ.size
   1094 		totalptr := ((count-1)*typ.size + typ.ptrdata) / ptrSize
   1095 		for i := uintptr(0); i < size/ptrSize; i++ {
   1096 			j := i % ndata
   1097 			var have, want uint8
   1098 			have = (*h.bitp >> h.shift) & (bitPointer | bitMarked)
   1099 			if i >= totalptr {
   1100 				want = 0 // deadmarker
   1101 				if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
   1102 					want = bitMarked
   1103 				}
   1104 			} else {
   1105 				if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
   1106 					want |= bitPointer
   1107 				}
   1108 				if i >= 2 {
   1109 					want |= bitMarked
   1110 				} else {
   1111 					have &^= bitMarked
   1112 				}
   1113 			}
   1114 			if have != want {
   1115 				println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size)
   1116 				print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
   1117 				print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
   1118 				print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
   1119 				h0 := heapBitsForAddr(x)
   1120 				print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
   1121 				print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
   1122 				print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
   1123 				println("at word", i, "offset", i*ptrSize, "have", have, "want", want)
   1124 				if typ.kind&kindGCProg != 0 {
   1125 					println("GC program:")
   1126 					dumpGCProg(addb(typ.gcdata, 4))
   1127 				}
   1128 				throw("bad heapBitsSetType")
   1129 			}
   1130 			h = h.next()
   1131 		}
   1132 		if ptrmask == debugPtrmask.data {
   1133 			unlock(&debugPtrmask.lock)
   1134 		}
   1135 	}
   1136 }
   1137 
   1138 var debugPtrmask struct {
   1139 	lock mutex
   1140 	data *byte
   1141 }
   1142 
   1143 // heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
   1144 // progSize is the size of the memory described by the program.
   1145 // elemSize is the size of the element that the GC program describes (a prefix of).
   1146 // dataSize is the total size of the intended data, a multiple of elemSize.
   1147 // allocSize is the total size of the allocated memory.
   1148 //
   1149 // GC programs are only used for large allocations.
   1150 // heapBitsSetType requires that allocSize is a multiple of 4 words,
   1151 // so that the relevant bitmap bytes are not shared with surrounding
   1152 // objects and need not be accessed with atomic instructions.
   1153 func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
   1154 	if ptrSize == 8 && allocSize%(4*ptrSize) != 0 {
   1155 		// Alignment will be wrong.
   1156 		throw("heapBitsSetTypeGCProg: small allocation")
   1157 	}
   1158 	var totalBits uintptr
   1159 	if elemSize == dataSize {
   1160 		totalBits = runGCProg(prog, nil, h.bitp, 2)
   1161 		if totalBits*ptrSize != progSize {
   1162 			println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
   1163 			throw("heapBitsSetTypeGCProg: unexpected bit count")
   1164 		}
   1165 	} else {
   1166 		count := dataSize / elemSize
   1167 
   1168 		// Piece together program trailer to run after prog that does:
   1169 		//	literal(0)
   1170 		//	repeat(1, elemSize-progSize-1) // zeros to fill element size
   1171 		//	repeat(elemSize, count-1) // repeat that element for count
   1172 		// This zero-pads the data remaining in the first element and then
   1173 		// repeats that first element to fill the array.
   1174 		var trailer [40]byte // 3 varints (max 10 each) + some bytes
   1175 		i := 0
   1176 		if n := elemSize/ptrSize - progSize/ptrSize; n > 0 {
   1177 			// literal(0)
   1178 			trailer[i] = 0x01
   1179 			i++
   1180 			trailer[i] = 0
   1181 			i++
   1182 			if n > 1 {
   1183 				// repeat(1, n-1)
   1184 				trailer[i] = 0x81
   1185 				i++
   1186 				n--
   1187 				for ; n >= 0x80; n >>= 7 {
   1188 					trailer[i] = byte(n | 0x80)
   1189 					i++
   1190 				}
   1191 				trailer[i] = byte(n)
   1192 				i++
   1193 			}
   1194 		}
   1195 		// repeat(elemSize/ptrSize, count-1)
   1196 		trailer[i] = 0x80
   1197 		i++
   1198 		n := elemSize / ptrSize
   1199 		for ; n >= 0x80; n >>= 7 {
   1200 			trailer[i] = byte(n | 0x80)
   1201 			i++
   1202 		}
   1203 		trailer[i] = byte(n)
   1204 		i++
   1205 		n = count - 1
   1206 		for ; n >= 0x80; n >>= 7 {
   1207 			trailer[i] = byte(n | 0x80)
   1208 			i++
   1209 		}
   1210 		trailer[i] = byte(n)
   1211 		i++
   1212 		trailer[i] = 0
   1213 		i++
   1214 
   1215 		runGCProg(prog, &trailer[0], h.bitp, 2)
   1216 
   1217 		// Even though we filled in the full array just now,
   1218 		// record that we only filled in up to the ptrdata of the
   1219 		// last element. This will cause the code below to
   1220 		// memclr the dead section of the final array element,
   1221 		// so that scanobject can stop early in the final element.
   1222 		totalBits = (elemSize*(count-1) + progSize) / ptrSize
   1223 	}
   1224 	endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
   1225 	endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
   1226 	memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
   1227 }
   1228 
   1229 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
   1230 // size the size of the region described by prog, in bytes.
   1231 // The resulting bitvector will have no more than size/ptrSize bits.
   1232 func progToPointerMask(prog *byte, size uintptr) bitvector {
   1233 	n := (size/ptrSize + 7) / 8
   1234 	x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
   1235 	x[len(x)-1] = 0xa1 // overflow check sentinel
   1236 	n = runGCProg(prog, nil, &x[0], 1)
   1237 	if x[len(x)-1] != 0xa1 {
   1238 		throw("progToPointerMask: overflow")
   1239 	}
   1240 	return bitvector{int32(n), &x[0]}
   1241 }
   1242 
   1243 // Packed GC pointer bitmaps, aka GC programs.
   1244 //
   1245 // For large types containing arrays, the type information has a
   1246 // natural repetition that can be encoded to save space in the
   1247 // binary and in the memory representation of the type information.
   1248 //
   1249 // The encoding is a simple Lempel-Ziv style bytecode machine
   1250 // with the following instructions:
   1251 //
   1252 //	00000000: stop
   1253 //	0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
   1254 //	10000000 n c: repeat the previous n bits c times; n, c are varints
   1255 //	1nnnnnnn c: repeat the previous n bits c times; c is a varint
   1256 
   1257 // runGCProg executes the GC program prog, and then trailer if non-nil,
   1258 // writing to dst with entries of the given size.
   1259 // If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
   1260 // If size == 2, dst is the 2-bit heap bitmap, and writes move backward
   1261 // starting at dst (because the heap bitmap does). In this case, the caller guarantees
   1262 // that only whole bytes in dst need to be written.
   1263 //
   1264 // runGCProg returns the number of 1- or 2-bit entries written to memory.
   1265 func runGCProg(prog, trailer, dst *byte, size int) uintptr {
   1266 	dstStart := dst
   1267 
   1268 	// Bits waiting to be written to memory.
   1269 	var bits uintptr
   1270 	var nbits uintptr
   1271 
   1272 	p := prog
   1273 Run:
   1274 	for {
   1275 		// Flush accumulated full bytes.
   1276 		// The rest of the loop assumes that nbits <= 7.
   1277 		for ; nbits >= 8; nbits -= 8 {
   1278 			if size == 1 {
   1279 				*dst = uint8(bits)
   1280 				dst = add1(dst)
   1281 				bits >>= 8
   1282 			} else {
   1283 				v := bits&bitPointerAll | bitMarkedAll
   1284 				*dst = uint8(v)
   1285 				dst = subtract1(dst)
   1286 				bits >>= 4
   1287 				v = bits&bitPointerAll | bitMarkedAll
   1288 				*dst = uint8(v)
   1289 				dst = subtract1(dst)
   1290 				bits >>= 4
   1291 			}
   1292 		}
   1293 
   1294 		// Process one instruction.
   1295 		inst := uintptr(*p)
   1296 		p = add1(p)
   1297 		n := inst & 0x7F
   1298 		if inst&0x80 == 0 {
   1299 			// Literal bits; n == 0 means end of program.
   1300 			if n == 0 {
   1301 				// Program is over; continue in trailer if present.
   1302 				if trailer != nil {
   1303 					//println("trailer")
   1304 					p = trailer
   1305 					trailer = nil
   1306 					continue
   1307 				}
   1308 				//println("done")
   1309 				break Run
   1310 			}
   1311 			//println("lit", n, dst)
   1312 			nbyte := n / 8
   1313 			for i := uintptr(0); i < nbyte; i++ {
   1314 				bits |= uintptr(*p) << nbits
   1315 				p = add1(p)
   1316 				if size == 1 {
   1317 					*dst = uint8(bits)
   1318 					dst = add1(dst)
   1319 					bits >>= 8
   1320 				} else {
   1321 					v := bits&0xf | bitMarkedAll
   1322 					*dst = uint8(v)
   1323 					dst = subtract1(dst)
   1324 					bits >>= 4
   1325 					v = bits&0xf | bitMarkedAll
   1326 					*dst = uint8(v)
   1327 					dst = subtract1(dst)
   1328 					bits >>= 4
   1329 				}
   1330 			}
   1331 			if n %= 8; n > 0 {
   1332 				bits |= uintptr(*p) << nbits
   1333 				p = add1(p)
   1334 				nbits += n
   1335 			}
   1336 			continue Run
   1337 		}
   1338 
   1339 		// Repeat. If n == 0, it is encoded in a varint in the next bytes.
   1340 		if n == 0 {
   1341 			for off := uint(0); ; off += 7 {
   1342 				x := uintptr(*p)
   1343 				p = add1(p)
   1344 				n |= (x & 0x7F) << off
   1345 				if x&0x80 == 0 {
   1346 					break
   1347 				}
   1348 			}
   1349 		}
   1350 
   1351 		// Count is encoded in a varint in the next bytes.
   1352 		c := uintptr(0)
   1353 		for off := uint(0); ; off += 7 {
   1354 			x := uintptr(*p)
   1355 			p = add1(p)
   1356 			c |= (x & 0x7F) << off
   1357 			if x&0x80 == 0 {
   1358 				break
   1359 			}
   1360 		}
   1361 		c *= n // now total number of bits to copy
   1362 
   1363 		// If the number of bits being repeated is small, load them
   1364 		// into a register and use that register for the entire loop
   1365 		// instead of repeatedly reading from memory.
   1366 		// Handling fewer than 8 bits here makes the general loop simpler.
   1367 		// The cutoff is ptrSize*8 - 7 to guarantee that when we add
   1368 		// the pattern to a bit buffer holding at most 7 bits (a partial byte)
   1369 		// it will not overflow.
   1370 		src := dst
   1371 		const maxBits = ptrSize*8 - 7
   1372 		if n <= maxBits {
   1373 			// Start with bits in output buffer.
   1374 			pattern := bits
   1375 			npattern := nbits
   1376 
   1377 			// If we need more bits, fetch them from memory.
   1378 			if size == 1 {
   1379 				src = subtract1(src)
   1380 				for npattern < n {
   1381 					pattern <<= 8
   1382 					pattern |= uintptr(*src)
   1383 					src = subtract1(src)
   1384 					npattern += 8
   1385 				}
   1386 			} else {
   1387 				src = add1(src)
   1388 				for npattern < n {
   1389 					pattern <<= 4
   1390 					pattern |= uintptr(*src) & 0xf
   1391 					src = add1(src)
   1392 					npattern += 4
   1393 				}
   1394 			}
   1395 
   1396 			// We started with the whole bit output buffer,
   1397 			// and then we loaded bits from whole bytes.
   1398 			// Either way, we might now have too many instead of too few.
   1399 			// Discard the extra.
   1400 			if npattern > n {
   1401 				pattern >>= npattern - n
   1402 				npattern = n
   1403 			}
   1404 
   1405 			// Replicate pattern to at most maxBits.
   1406 			if npattern == 1 {
   1407 				// One bit being repeated.
   1408 				// If the bit is 1, make the pattern all 1s.
   1409 				// If the bit is 0, the pattern is already all 0s,
   1410 				// but we can claim that the number of bits
   1411 				// in the word is equal to the number we need (c),
   1412 				// because right shift of bits will zero fill.
   1413 				if pattern == 1 {
   1414 					pattern = 1<<maxBits - 1
   1415 					npattern = maxBits
   1416 				} else {
   1417 					npattern = c
   1418 				}
   1419 			} else {
   1420 				b := pattern
   1421 				nb := npattern
   1422 				if nb+nb <= maxBits {
   1423 					// Double pattern until the whole uintptr is filled.
   1424 					for nb <= ptrSize*8 {
   1425 						b |= b << nb
   1426 						nb += nb
   1427 					}
   1428 					// Trim away incomplete copy of original pattern in high bits.
   1429 					// TODO(rsc): Replace with table lookup or loop on systems without divide?
   1430 					nb = maxBits / npattern * npattern
   1431 					b &= 1<<nb - 1
   1432 					pattern = b
   1433 					npattern = nb
   1434 				}
   1435 			}
   1436 
   1437 			// Add pattern to bit buffer and flush bit buffer, c/npattern times.
   1438 			// Since pattern contains >8 bits, there will be full bytes to flush
   1439 			// on each iteration.
   1440 			for ; c >= npattern; c -= npattern {
   1441 				bits |= pattern << nbits
   1442 				nbits += npattern
   1443 				if size == 1 {
   1444 					for nbits >= 8 {
   1445 						*dst = uint8(bits)
   1446 						dst = add1(dst)
   1447 						bits >>= 8
   1448 						nbits -= 8
   1449 					}
   1450 				} else {
   1451 					for nbits >= 4 {
   1452 						*dst = uint8(bits&0xf | bitMarkedAll)
   1453 						dst = subtract1(dst)
   1454 						bits >>= 4
   1455 						nbits -= 4
   1456 					}
   1457 				}
   1458 			}
   1459 
   1460 			// Add final fragment to bit buffer.
   1461 			if c > 0 {
   1462 				pattern &= 1<<c - 1
   1463 				bits |= pattern << nbits
   1464 				nbits += c
   1465 			}
   1466 			continue Run
   1467 		}
   1468 
   1469 		// Repeat; n too large to fit in a register.
   1470 		// Since nbits <= 7, we know the first few bytes of repeated data
   1471 		// are already written to memory.
   1472 		off := n - nbits // n > nbits because n > maxBits and nbits <= 7
   1473 		if size == 1 {
   1474 			// Leading src fragment.
   1475 			src = subtractb(src, (off+7)/8)
   1476 			if frag := off & 7; frag != 0 {
   1477 				bits |= uintptr(*src) >> (8 - frag) << nbits
   1478 				src = add1(src)
   1479 				nbits += frag
   1480 				c -= frag
   1481 			}
   1482 			// Main loop: load one byte, write another.
   1483 			// The bits are rotating through the bit buffer.
   1484 			for i := c / 8; i > 0; i-- {
   1485 				bits |= uintptr(*src) << nbits
   1486 				src = add1(src)
   1487 				*dst = uint8(bits)
   1488 				dst = add1(dst)
   1489 				bits >>= 8
   1490 			}
   1491 			// Final src fragment.
   1492 			if c %= 8; c > 0 {
   1493 				bits |= (uintptr(*src) & (1<<c - 1)) << nbits
   1494 				nbits += c
   1495 			}
   1496 		} else {
   1497 			// Leading src fragment.
   1498 			src = addb(src, (off+3)/4)
   1499 			if frag := off & 3; frag != 0 {
   1500 				bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
   1501 				src = subtract1(src)
   1502 				nbits += frag
   1503 				c -= frag
   1504 			}
   1505 			// Main loop: load one byte, write another.
   1506 			// The bits are rotating through the bit buffer.
   1507 			for i := c / 4; i > 0; i-- {
   1508 				bits |= (uintptr(*src) & 0xf) << nbits
   1509 				src = subtract1(src)
   1510 				*dst = uint8(bits&0xf | bitMarkedAll)
   1511 				dst = subtract1(dst)
   1512 				bits >>= 4
   1513 			}
   1514 			// Final src fragment.
   1515 			if c %= 4; c > 0 {
   1516 				bits |= (uintptr(*src) & (1<<c - 1)) << nbits
   1517 				nbits += c
   1518 			}
   1519 		}
   1520 	}
   1521 
   1522 	// Write any final bits out, using full-byte writes, even for the final byte.
   1523 	var totalBits uintptr
   1524 	if size == 1 {
   1525 		totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
   1526 		nbits += -nbits & 7
   1527 		for ; nbits > 0; nbits -= 8 {
   1528 			*dst = uint8(bits)
   1529 			dst = add1(dst)
   1530 			bits >>= 8
   1531 		}
   1532 	} else {
   1533 		totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
   1534 		nbits += -nbits & 3
   1535 		for ; nbits > 0; nbits -= 4 {
   1536 			v := bits&0xf | bitMarkedAll
   1537 			*dst = uint8(v)
   1538 			dst = subtract1(dst)
   1539 			bits >>= 4
   1540 		}
   1541 		// Clear the mark bits in the first two entries.
   1542 		// They are the actual mark and checkmark bits,
   1543 		// not non-dead markers. It simplified the code
   1544 		// above to set the marker in every bit written and
   1545 		// then clear these two as a special case at the end.
   1546 		*dstStart &^= bitMarked | bitMarked<<heapBitsShift
   1547 	}
   1548 	return totalBits
   1549 }
   1550 
   1551 func dumpGCProg(p *byte) {
   1552 	nptr := 0
   1553 	for {
   1554 		x := *p
   1555 		p = add1(p)
   1556 		if x == 0 {
   1557 			print("\t", nptr, " end\n")
   1558 			break
   1559 		}
   1560 		if x&0x80 == 0 {
   1561 			print("\t", nptr, " lit ", x, ":")
   1562 			n := int(x+7) / 8
   1563 			for i := 0; i < n; i++ {
   1564 				print(" ", hex(*p))
   1565 				p = add1(p)
   1566 			}
   1567 			print("\n")
   1568 			nptr += int(x)
   1569 		} else {
   1570 			nbit := int(x &^ 0x80)
   1571 			if nbit == 0 {
   1572 				for nb := uint(0); ; nb += 7 {
   1573 					x := *p
   1574 					p = add1(p)
   1575 					nbit |= int(x&0x7f) << nb
   1576 					if x&0x80 == 0 {
   1577 						break
   1578 					}
   1579 				}
   1580 			}
   1581 			count := 0
   1582 			for nb := uint(0); ; nb += 7 {
   1583 				x := *p
   1584 				p = add1(p)
   1585 				count |= int(x&0x7f) << nb
   1586 				if x&0x80 == 0 {
   1587 					break
   1588 				}
   1589 			}
   1590 			print("\t", nptr, " repeat ", nbit, "  ", count, "\n")
   1591 			nptr += nbit * count
   1592 		}
   1593 	}
   1594 }
   1595 
   1596 // Testing.
   1597 
   1598 func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
   1599 	target := (*stkframe)(ctxt)
   1600 	if frame.sp <= target.sp && target.sp < frame.varp {
   1601 		*target = *frame
   1602 		return false
   1603 	}
   1604 	return true
   1605 }
   1606 
   1607 // gcbits returns the GC type info for x, for testing.
   1608 // The result is the bitmap entries (0 or 1), one entry per byte.
   1609 //go:linkname reflect_gcbits reflect.gcbits
   1610 func reflect_gcbits(x interface{}) []byte {
   1611 	ret := getgcmask(x)
   1612 	typ := (*ptrtype)(unsafe.Pointer((*eface)(unsafe.Pointer(&x))._type)).elem
   1613 	nptr := typ.ptrdata / ptrSize
   1614 	for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
   1615 		ret = ret[:len(ret)-1]
   1616 	}
   1617 	return ret
   1618 }
   1619 
   1620 // Returns GC type info for object p for testing.
   1621 func getgcmask(ep interface{}) (mask []byte) {
   1622 	e := *(*eface)(unsafe.Pointer(&ep))
   1623 	p := e.data
   1624 	t := e._type
   1625 	// data or bss
   1626 	for datap := &firstmoduledata; datap != nil; datap = datap.next {
   1627 		// data
   1628 		if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
   1629 			bitmap := datap.gcdatamask.bytedata
   1630 			n := (*ptrtype)(unsafe.Pointer(t)).elem.size
   1631 			mask = make([]byte, n/ptrSize)
   1632 			for i := uintptr(0); i < n; i += ptrSize {
   1633 				off := (uintptr(p) + i - datap.data) / ptrSize
   1634 				mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
   1635 			}
   1636 			return
   1637 		}
   1638 
   1639 		// bss
   1640 		if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
   1641 			bitmap := datap.gcbssmask.bytedata
   1642 			n := (*ptrtype)(unsafe.Pointer(t)).elem.size
   1643 			mask = make([]byte, n/ptrSize)
   1644 			for i := uintptr(0); i < n; i += ptrSize {
   1645 				off := (uintptr(p) + i - datap.bss) / ptrSize
   1646 				mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
   1647 			}
   1648 			return
   1649 		}
   1650 	}
   1651 
   1652 	// heap
   1653 	var n uintptr
   1654 	var base uintptr
   1655 	if mlookup(uintptr(p), &base, &n, nil) != 0 {
   1656 		mask = make([]byte, n/ptrSize)
   1657 		for i := uintptr(0); i < n; i += ptrSize {
   1658 			hbits := heapBitsForAddr(base + i)
   1659 			if hbits.isPointer() {
   1660 				mask[i/ptrSize] = 1
   1661 			}
   1662 			if i >= 2*ptrSize && !hbits.isMarked() {
   1663 				mask = mask[:i/ptrSize]
   1664 				break
   1665 			}
   1666 		}
   1667 		return
   1668 	}
   1669 
   1670 	// stack
   1671 	if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
   1672 		var frame stkframe
   1673 		frame.sp = uintptr(p)
   1674 		_g_ := getg()
   1675 		gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
   1676 		if frame.fn != nil {
   1677 			f := frame.fn
   1678 			targetpc := frame.continpc
   1679 			if targetpc == 0 {
   1680 				return
   1681 			}
   1682 			if targetpc != f.entry {
   1683 				targetpc--
   1684 			}
   1685 			pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
   1686 			if pcdata == -1 {
   1687 				return
   1688 			}
   1689 			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
   1690 			if stkmap == nil || stkmap.n <= 0 {
   1691 				return
   1692 			}
   1693 			bv := stackmapdata(stkmap, pcdata)
   1694 			size := uintptr(bv.n) * ptrSize
   1695 			n := (*ptrtype)(unsafe.Pointer(t)).elem.size
   1696 			mask = make([]byte, n/ptrSize)
   1697 			for i := uintptr(0); i < n; i += ptrSize {
   1698 				bitmap := bv.bytedata
   1699 				off := (uintptr(p) + i - frame.varp + size) / ptrSize
   1700 				mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
   1701 			}
   1702 		}
   1703 		return
   1704 	}
   1705 
   1706 	// otherwise, not something the GC knows about.
   1707 	// possibly read-only data, like malloc(0).
   1708 	// must not have pointers
   1709 	return
   1710 }
   1711