Home | History | Annotate | Download | only in runtime
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Memory statistics
      6 
      7 package runtime
      8 
      9 import (
     10 	"runtime/internal/atomic"
     11 	"runtime/internal/sys"
     12 	"unsafe"
     13 )
     14 
     15 // Statistics.
     16 // If you edit this structure, also edit type MemStats below.
     17 // Their layouts must match exactly.
     18 //
     19 // For detailed descriptions see the documentation for MemStats.
     20 // Fields that differ from MemStats are further documented here.
     21 //
     22 // Many of these fields are updated on the fly, while others are only
     23 // updated when updatememstats is called.
     24 type mstats struct {
     25 	// General statistics.
     26 	alloc       uint64 // bytes allocated and not yet freed
     27 	total_alloc uint64 // bytes allocated (even if freed)
     28 	sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
     29 	nlookup     uint64 // number of pointer lookups
     30 	nmalloc     uint64 // number of mallocs
     31 	nfree       uint64 // number of frees
     32 
     33 	// Statistics about malloc heap.
     34 	// Protected by mheap.lock
     35 	//
     36 	// Like MemStats, heap_sys and heap_inuse do not count memory
     37 	// in manually-managed spans.
     38 	heap_alloc    uint64 // bytes allocated and not yet freed (same as alloc above)
     39 	heap_sys      uint64 // virtual address space obtained from system for GC'd heap
     40 	heap_idle     uint64 // bytes in idle spans
     41 	heap_inuse    uint64 // bytes in _MSpanInUse spans
     42 	heap_released uint64 // bytes released to the os
     43 	heap_objects  uint64 // total number of allocated objects
     44 
     45 	// TODO(austin): heap_released is both useless and inaccurate
     46 	// in its current form. It's useless because, from the user's
     47 	// and OS's perspectives, there's no difference between a page
     48 	// that has not yet been faulted in and a page that has been
     49 	// released back to the OS. We could fix this by considering
     50 	// newly mapped spans to be "released". It's inaccurate
     51 	// because when we split a large span for allocation, we
     52 	// "unrelease" all pages in the large span and not just the
     53 	// ones we split off for use. This is trickier to fix because
     54 	// we currently don't know which pages of a span we've
     55 	// released. We could fix it by separating "free" and
     56 	// "released" spans, but then we have to allocate from runs of
     57 	// free and released spans.
     58 
     59 	// Statistics about allocation of low-level fixed-size structures.
     60 	// Protected by FixAlloc locks.
     61 	stacks_inuse uint64 // bytes in manually-managed stack spans
     62 	stacks_sys   uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
     63 	mspan_inuse  uint64 // mspan structures
     64 	mspan_sys    uint64
     65 	mcache_inuse uint64 // mcache structures
     66 	mcache_sys   uint64
     67 	buckhash_sys uint64 // profiling bucket hash table
     68 	gc_sys       uint64
     69 	other_sys    uint64
     70 
     71 	// Statistics about garbage collector.
     72 	// Protected by mheap or stopping the world during GC.
     73 	next_gc         uint64 // goal heap_live for when next GC ends; ^0 if disabled
     74 	last_gc_unix    uint64 // last gc (in unix time)
     75 	pause_total_ns  uint64
     76 	pause_ns        [256]uint64 // circular buffer of recent gc pause lengths
     77 	pause_end       [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
     78 	numgc           uint32
     79 	numforcedgc     uint32  // number of user-forced GCs
     80 	gc_cpu_fraction float64 // fraction of CPU time used by GC
     81 	enablegc        bool
     82 	debuggc         bool
     83 
     84 	// Statistics about allocation size classes.
     85 
     86 	by_size [_NumSizeClasses]struct {
     87 		size    uint32
     88 		nmalloc uint64
     89 		nfree   uint64
     90 	}
     91 
     92 	// Statistics below here are not exported to MemStats directly.
     93 
     94 	last_gc_nanotime uint64 // last gc (monotonic time)
     95 	tinyallocs       uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
     96 
     97 	// triggerRatio is the heap growth ratio that triggers marking.
     98 	//
     99 	// E.g., if this is 0.6, then GC should start when the live
    100 	// heap has reached 1.6 times the heap size marked by the
    101 	// previous cycle. This should be  GOGC/100 so the trigger
    102 	// heap size is less than the goal heap size. This is set
    103 	// during mark termination for the next cycle's trigger.
    104 	triggerRatio float64
    105 
    106 	// gc_trigger is the heap size that triggers marking.
    107 	//
    108 	// When heap_live  gc_trigger, the mark phase will start.
    109 	// This is also the heap size by which proportional sweeping
    110 	// must be complete.
    111 	//
    112 	// This is computed from triggerRatio during mark termination
    113 	// for the next cycle's trigger.
    114 	gc_trigger uint64
    115 
    116 	// heap_live is the number of bytes considered live by the GC.
    117 	// That is: retained by the most recent GC plus allocated
    118 	// since then. heap_live <= heap_alloc, since heap_alloc
    119 	// includes unmarked objects that have not yet been swept (and
    120 	// hence goes up as we allocate and down as we sweep) while
    121 	// heap_live excludes these objects (and hence only goes up
    122 	// between GCs).
    123 	//
    124 	// This is updated atomically without locking. To reduce
    125 	// contention, this is updated only when obtaining a span from
    126 	// an mcentral and at this point it counts all of the
    127 	// unallocated slots in that span (which will be allocated
    128 	// before that mcache obtains another span from that
    129 	// mcentral). Hence, it slightly overestimates the "true" live
    130 	// heap size. It's better to overestimate than to
    131 	// underestimate because 1) this triggers the GC earlier than
    132 	// necessary rather than potentially too late and 2) this
    133 	// leads to a conservative GC rate rather than a GC rate that
    134 	// is potentially too low.
    135 	//
    136 	// Reads should likewise be atomic (or during STW).
    137 	//
    138 	// Whenever this is updated, call traceHeapAlloc() and
    139 	// gcController.revise().
    140 	heap_live uint64
    141 
    142 	// heap_scan is the number of bytes of "scannable" heap. This
    143 	// is the live heap (as counted by heap_live), but omitting
    144 	// no-scan objects and no-scan tails of objects.
    145 	//
    146 	// Whenever this is updated, call gcController.revise().
    147 	heap_scan uint64
    148 
    149 	// heap_marked is the number of bytes marked by the previous
    150 	// GC. After mark termination, heap_live == heap_marked, but
    151 	// unlike heap_live, heap_marked does not change until the
    152 	// next mark termination.
    153 	heap_marked uint64
    154 }
    155 
    156 var memstats mstats
    157 
    158 // A MemStats records statistics about the memory allocator.
    159 type MemStats struct {
    160 	// General statistics.
    161 
    162 	// Alloc is bytes of allocated heap objects.
    163 	//
    164 	// This is the same as HeapAlloc (see below).
    165 	Alloc uint64
    166 
    167 	// TotalAlloc is cumulative bytes allocated for heap objects.
    168 	//
    169 	// TotalAlloc increases as heap objects are allocated, but
    170 	// unlike Alloc and HeapAlloc, it does not decrease when
    171 	// objects are freed.
    172 	TotalAlloc uint64
    173 
    174 	// Sys is the total bytes of memory obtained from the OS.
    175 	//
    176 	// Sys is the sum of the XSys fields below. Sys measures the
    177 	// virtual address space reserved by the Go runtime for the
    178 	// heap, stacks, and other internal data structures. It's
    179 	// likely that not all of the virtual address space is backed
    180 	// by physical memory at any given moment, though in general
    181 	// it all was at some point.
    182 	Sys uint64
    183 
    184 	// Lookups is the number of pointer lookups performed by the
    185 	// runtime.
    186 	//
    187 	// This is primarily useful for debugging runtime internals.
    188 	Lookups uint64
    189 
    190 	// Mallocs is the cumulative count of heap objects allocated.
    191 	// The number of live objects is Mallocs - Frees.
    192 	Mallocs uint64
    193 
    194 	// Frees is the cumulative count of heap objects freed.
    195 	Frees uint64
    196 
    197 	// Heap memory statistics.
    198 	//
    199 	// Interpreting the heap statistics requires some knowledge of
    200 	// how Go organizes memory. Go divides the virtual address
    201 	// space of the heap into "spans", which are contiguous
    202 	// regions of memory 8K or larger. A span may be in one of
    203 	// three states:
    204 	//
    205 	// An "idle" span contains no objects or other data. The
    206 	// physical memory backing an idle span can be released back
    207 	// to the OS (but the virtual address space never is), or it
    208 	// can be converted into an "in use" or "stack" span.
    209 	//
    210 	// An "in use" span contains at least one heap object and may
    211 	// have free space available to allocate more heap objects.
    212 	//
    213 	// A "stack" span is used for goroutine stacks. Stack spans
    214 	// are not considered part of the heap. A span can change
    215 	// between heap and stack memory; it is never used for both
    216 	// simultaneously.
    217 
    218 	// HeapAlloc is bytes of allocated heap objects.
    219 	//
    220 	// "Allocated" heap objects include all reachable objects, as
    221 	// well as unreachable objects that the garbage collector has
    222 	// not yet freed. Specifically, HeapAlloc increases as heap
    223 	// objects are allocated and decreases as the heap is swept
    224 	// and unreachable objects are freed. Sweeping occurs
    225 	// incrementally between GC cycles, so these two processes
    226 	// occur simultaneously, and as a result HeapAlloc tends to
    227 	// change smoothly (in contrast with the sawtooth that is
    228 	// typical of stop-the-world garbage collectors).
    229 	HeapAlloc uint64
    230 
    231 	// HeapSys is bytes of heap memory obtained from the OS.
    232 	//
    233 	// HeapSys measures the amount of virtual address space
    234 	// reserved for the heap. This includes virtual address space
    235 	// that has been reserved but not yet used, which consumes no
    236 	// physical memory, but tends to be small, as well as virtual
    237 	// address space for which the physical memory has been
    238 	// returned to the OS after it became unused (see HeapReleased
    239 	// for a measure of the latter).
    240 	//
    241 	// HeapSys estimates the largest size the heap has had.
    242 	HeapSys uint64
    243 
    244 	// HeapIdle is bytes in idle (unused) spans.
    245 	//
    246 	// Idle spans have no objects in them. These spans could be
    247 	// (and may already have been) returned to the OS, or they can
    248 	// be reused for heap allocations, or they can be reused as
    249 	// stack memory.
    250 	//
    251 	// HeapIdle minus HeapReleased estimates the amount of memory
    252 	// that could be returned to the OS, but is being retained by
    253 	// the runtime so it can grow the heap without requesting more
    254 	// memory from the OS. If this difference is significantly
    255 	// larger than the heap size, it indicates there was a recent
    256 	// transient spike in live heap size.
    257 	HeapIdle uint64
    258 
    259 	// HeapInuse is bytes in in-use spans.
    260 	//
    261 	// In-use spans have at least one object in them. These spans
    262 	// can only be used for other objects of roughly the same
    263 	// size.
    264 	//
    265 	// HeapInuse minus HeapAlloc estimates the amount of memory
    266 	// that has been dedicated to particular size classes, but is
    267 	// not currently being used. This is an upper bound on
    268 	// fragmentation, but in general this memory can be reused
    269 	// efficiently.
    270 	HeapInuse uint64
    271 
    272 	// HeapReleased is bytes of physical memory returned to the OS.
    273 	//
    274 	// This counts heap memory from idle spans that was returned
    275 	// to the OS and has not yet been reacquired for the heap.
    276 	HeapReleased uint64
    277 
    278 	// HeapObjects is the number of allocated heap objects.
    279 	//
    280 	// Like HeapAlloc, this increases as objects are allocated and
    281 	// decreases as the heap is swept and unreachable objects are
    282 	// freed.
    283 	HeapObjects uint64
    284 
    285 	// Stack memory statistics.
    286 	//
    287 	// Stacks are not considered part of the heap, but the runtime
    288 	// can reuse a span of heap memory for stack memory, and
    289 	// vice-versa.
    290 
    291 	// StackInuse is bytes in stack spans.
    292 	//
    293 	// In-use stack spans have at least one stack in them. These
    294 	// spans can only be used for other stacks of the same size.
    295 	//
    296 	// There is no StackIdle because unused stack spans are
    297 	// returned to the heap (and hence counted toward HeapIdle).
    298 	StackInuse uint64
    299 
    300 	// StackSys is bytes of stack memory obtained from the OS.
    301 	//
    302 	// StackSys is StackInuse, plus any memory obtained directly
    303 	// from the OS for OS thread stacks (which should be minimal).
    304 	StackSys uint64
    305 
    306 	// Off-heap memory statistics.
    307 	//
    308 	// The following statistics measure runtime-internal
    309 	// structures that are not allocated from heap memory (usually
    310 	// because they are part of implementing the heap). Unlike
    311 	// heap or stack memory, any memory allocated to these
    312 	// structures is dedicated to these structures.
    313 	//
    314 	// These are primarily useful for debugging runtime memory
    315 	// overheads.
    316 
    317 	// MSpanInuse is bytes of allocated mspan structures.
    318 	MSpanInuse uint64
    319 
    320 	// MSpanSys is bytes of memory obtained from the OS for mspan
    321 	// structures.
    322 	MSpanSys uint64
    323 
    324 	// MCacheInuse is bytes of allocated mcache structures.
    325 	MCacheInuse uint64
    326 
    327 	// MCacheSys is bytes of memory obtained from the OS for
    328 	// mcache structures.
    329 	MCacheSys uint64
    330 
    331 	// BuckHashSys is bytes of memory in profiling bucket hash tables.
    332 	BuckHashSys uint64
    333 
    334 	// GCSys is bytes of memory in garbage collection metadata.
    335 	GCSys uint64
    336 
    337 	// OtherSys is bytes of memory in miscellaneous off-heap
    338 	// runtime allocations.
    339 	OtherSys uint64
    340 
    341 	// Garbage collector statistics.
    342 
    343 	// NextGC is the target heap size of the next GC cycle.
    344 	//
    345 	// The garbage collector's goal is to keep HeapAlloc  NextGC.
    346 	// At the end of each GC cycle, the target for the next cycle
    347 	// is computed based on the amount of reachable data and the
    348 	// value of GOGC.
    349 	NextGC uint64
    350 
    351 	// LastGC is the time the last garbage collection finished, as
    352 	// nanoseconds since 1970 (the UNIX epoch).
    353 	LastGC uint64
    354 
    355 	// PauseTotalNs is the cumulative nanoseconds in GC
    356 	// stop-the-world pauses since the program started.
    357 	//
    358 	// During a stop-the-world pause, all goroutines are paused
    359 	// and only the garbage collector can run.
    360 	PauseTotalNs uint64
    361 
    362 	// PauseNs is a circular buffer of recent GC stop-the-world
    363 	// pause times in nanoseconds.
    364 	//
    365 	// The most recent pause is at PauseNs[(NumGC+255)%256]. In
    366 	// general, PauseNs[N%256] records the time paused in the most
    367 	// recent N%256th GC cycle. There may be multiple pauses per
    368 	// GC cycle; this is the sum of all pauses during a cycle.
    369 	PauseNs [256]uint64
    370 
    371 	// PauseEnd is a circular buffer of recent GC pause end times,
    372 	// as nanoseconds since 1970 (the UNIX epoch).
    373 	//
    374 	// This buffer is filled the same way as PauseNs. There may be
    375 	// multiple pauses per GC cycle; this records the end of the
    376 	// last pause in a cycle.
    377 	PauseEnd [256]uint64
    378 
    379 	// NumGC is the number of completed GC cycles.
    380 	NumGC uint32
    381 
    382 	// NumForcedGC is the number of GC cycles that were forced by
    383 	// the application calling the GC function.
    384 	NumForcedGC uint32
    385 
    386 	// GCCPUFraction is the fraction of this program's available
    387 	// CPU time used by the GC since the program started.
    388 	//
    389 	// GCCPUFraction is expressed as a number between 0 and 1,
    390 	// where 0 means GC has consumed none of this program's CPU. A
    391 	// program's available CPU time is defined as the integral of
    392 	// GOMAXPROCS since the program started. That is, if
    393 	// GOMAXPROCS is 2 and a program has been running for 10
    394 	// seconds, its "available CPU" is 20 seconds. GCCPUFraction
    395 	// does not include CPU time used for write barrier activity.
    396 	//
    397 	// This is the same as the fraction of CPU reported by
    398 	// GODEBUG=gctrace=1.
    399 	GCCPUFraction float64
    400 
    401 	// EnableGC indicates that GC is enabled. It is always true,
    402 	// even if GOGC=off.
    403 	EnableGC bool
    404 
    405 	// DebugGC is currently unused.
    406 	DebugGC bool
    407 
    408 	// BySize reports per-size class allocation statistics.
    409 	//
    410 	// BySize[N] gives statistics for allocations of size S where
    411 	// BySize[N-1].Size < S  BySize[N].Size.
    412 	//
    413 	// This does not report allocations larger than BySize[60].Size.
    414 	BySize [61]struct {
    415 		// Size is the maximum byte size of an object in this
    416 		// size class.
    417 		Size uint32
    418 
    419 		// Mallocs is the cumulative count of heap objects
    420 		// allocated in this size class. The cumulative bytes
    421 		// of allocation is Size*Mallocs. The number of live
    422 		// objects in this size class is Mallocs - Frees.
    423 		Mallocs uint64
    424 
    425 		// Frees is the cumulative count of heap objects freed
    426 		// in this size class.
    427 		Frees uint64
    428 	}
    429 }
    430 
    431 // Size of the trailing by_size array differs between mstats and MemStats,
    432 // and all data after by_size is local to runtime, not exported.
    433 // NumSizeClasses was changed, but we cannot change MemStats because of backward compatibility.
    434 // sizeof_C_MStats is the size of the prefix of mstats that
    435 // corresponds to MemStats. It should match Sizeof(MemStats{}).
    436 var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
    437 
    438 func init() {
    439 	var memStats MemStats
    440 	if sizeof_C_MStats != unsafe.Sizeof(memStats) {
    441 		println(sizeof_C_MStats, unsafe.Sizeof(memStats))
    442 		throw("MStats vs MemStatsType size mismatch")
    443 	}
    444 
    445 	if unsafe.Offsetof(memstats.heap_live)%8 != 0 {
    446 		println(unsafe.Offsetof(memstats.heap_live))
    447 		throw("memstats.heap_live not aligned to 8 bytes")
    448 	}
    449 }
    450 
    451 // ReadMemStats populates m with memory allocator statistics.
    452 //
    453 // The returned memory allocator statistics are up to date as of the
    454 // call to ReadMemStats. This is in contrast with a heap profile,
    455 // which is a snapshot as of the most recently completed garbage
    456 // collection cycle.
    457 func ReadMemStats(m *MemStats) {
    458 	stopTheWorld("read mem stats")
    459 
    460 	systemstack(func() {
    461 		readmemstats_m(m)
    462 	})
    463 
    464 	startTheWorld()
    465 }
    466 
    467 func readmemstats_m(stats *MemStats) {
    468 	updatememstats()
    469 
    470 	// The size of the trailing by_size array differs between
    471 	// mstats and MemStats. NumSizeClasses was changed, but we
    472 	// cannot change MemStats because of backward compatibility.
    473 	memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
    474 
    475 	// memstats.stacks_sys is only memory mapped directly for OS stacks.
    476 	// Add in heap-allocated stack memory for user consumption.
    477 	stats.StackSys += stats.StackInuse
    478 }
    479 
    480 //go:linkname readGCStats runtime/debug.readGCStats
    481 func readGCStats(pauses *[]uint64) {
    482 	systemstack(func() {
    483 		readGCStats_m(pauses)
    484 	})
    485 }
    486 
    487 func readGCStats_m(pauses *[]uint64) {
    488 	p := *pauses
    489 	// Calling code in runtime/debug should make the slice large enough.
    490 	if cap(p) < len(memstats.pause_ns)+3 {
    491 		throw("short slice passed to readGCStats")
    492 	}
    493 
    494 	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
    495 	lock(&mheap_.lock)
    496 
    497 	n := memstats.numgc
    498 	if n > uint32(len(memstats.pause_ns)) {
    499 		n = uint32(len(memstats.pause_ns))
    500 	}
    501 
    502 	// The pause buffer is circular. The most recent pause is at
    503 	// pause_ns[(numgc-1)%len(pause_ns)], and then backward
    504 	// from there to go back farther in time. We deliver the times
    505 	// most recent first (in p[0]).
    506 	p = p[:cap(p)]
    507 	for i := uint32(0); i < n; i++ {
    508 		j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
    509 		p[i] = memstats.pause_ns[j]
    510 		p[n+i] = memstats.pause_end[j]
    511 	}
    512 
    513 	p[n+n] = memstats.last_gc_unix
    514 	p[n+n+1] = uint64(memstats.numgc)
    515 	p[n+n+2] = memstats.pause_total_ns
    516 	unlock(&mheap_.lock)
    517 	*pauses = p[:n+n+3]
    518 }
    519 
    520 //go:nowritebarrier
    521 func updatememstats() {
    522 	memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
    523 	memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
    524 	memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
    525 		memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
    526 
    527 	// We also count stacks_inuse as sys memory.
    528 	memstats.sys += memstats.stacks_inuse
    529 
    530 	// Calculate memory allocator stats.
    531 	// During program execution we only count number of frees and amount of freed memory.
    532 	// Current number of alive object in the heap and amount of alive heap memory
    533 	// are calculated by scanning all spans.
    534 	// Total number of mallocs is calculated as number of frees plus number of alive objects.
    535 	// Similarly, total amount of allocated memory is calculated as amount of freed memory
    536 	// plus amount of alive heap memory.
    537 	memstats.alloc = 0
    538 	memstats.total_alloc = 0
    539 	memstats.nmalloc = 0
    540 	memstats.nfree = 0
    541 	for i := 0; i < len(memstats.by_size); i++ {
    542 		memstats.by_size[i].nmalloc = 0
    543 		memstats.by_size[i].nfree = 0
    544 	}
    545 
    546 	// Flush MCache's to MCentral.
    547 	systemstack(flushallmcaches)
    548 
    549 	// Aggregate local stats.
    550 	cachestats()
    551 
    552 	// Collect allocation stats. This is safe and consistent
    553 	// because the world is stopped.
    554 	var smallFree, totalAlloc, totalFree uint64
    555 	// Collect per-spanclass stats.
    556 	for spc := range mheap_.central {
    557 		// The mcaches are now empty, so mcentral stats are
    558 		// up-to-date.
    559 		c := &mheap_.central[spc].mcentral
    560 		memstats.nmalloc += c.nmalloc
    561 		i := spanClass(spc).sizeclass()
    562 		memstats.by_size[i].nmalloc += c.nmalloc
    563 		totalAlloc += c.nmalloc * uint64(class_to_size[i])
    564 	}
    565 	// Collect per-sizeclass stats.
    566 	for i := 0; i < _NumSizeClasses; i++ {
    567 		if i == 0 {
    568 			memstats.nmalloc += mheap_.nlargealloc
    569 			totalAlloc += mheap_.largealloc
    570 			totalFree += mheap_.largefree
    571 			memstats.nfree += mheap_.nlargefree
    572 			continue
    573 		}
    574 
    575 		// The mcache stats have been flushed to mheap_.
    576 		memstats.nfree += mheap_.nsmallfree[i]
    577 		memstats.by_size[i].nfree = mheap_.nsmallfree[i]
    578 		smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
    579 	}
    580 	totalFree += smallFree
    581 
    582 	memstats.nfree += memstats.tinyallocs
    583 	memstats.nmalloc += memstats.tinyallocs
    584 
    585 	// Calculate derived stats.
    586 	memstats.total_alloc = totalAlloc
    587 	memstats.alloc = totalAlloc - totalFree
    588 	memstats.heap_alloc = memstats.alloc
    589 	memstats.heap_objects = memstats.nmalloc - memstats.nfree
    590 }
    591 
    592 // cachestats flushes all mcache stats.
    593 //
    594 // The world must be stopped.
    595 //
    596 //go:nowritebarrier
    597 func cachestats() {
    598 	for _, p := range allp {
    599 		c := p.mcache
    600 		if c == nil {
    601 			continue
    602 		}
    603 		purgecachedstats(c)
    604 	}
    605 }
    606 
    607 // flushmcache flushes the mcache of allp[i].
    608 //
    609 // The world must be stopped.
    610 //
    611 //go:nowritebarrier
    612 func flushmcache(i int) {
    613 	p := allp[i]
    614 	c := p.mcache
    615 	if c == nil {
    616 		return
    617 	}
    618 	c.releaseAll()
    619 	stackcache_clear(c)
    620 }
    621 
    622 // flushallmcaches flushes the mcaches of all Ps.
    623 //
    624 // The world must be stopped.
    625 //
    626 //go:nowritebarrier
    627 func flushallmcaches() {
    628 	for i := 0; i < int(gomaxprocs); i++ {
    629 		flushmcache(i)
    630 	}
    631 }
    632 
    633 //go:nosplit
    634 func purgecachedstats(c *mcache) {
    635 	// Protected by either heap or GC lock.
    636 	h := &mheap_
    637 	memstats.heap_scan += uint64(c.local_scan)
    638 	c.local_scan = 0
    639 	memstats.tinyallocs += uint64(c.local_tinyallocs)
    640 	c.local_tinyallocs = 0
    641 	memstats.nlookup += uint64(c.local_nlookup)
    642 	c.local_nlookup = 0
    643 	h.largefree += uint64(c.local_largefree)
    644 	c.local_largefree = 0
    645 	h.nlargefree += uint64(c.local_nlargefree)
    646 	c.local_nlargefree = 0
    647 	for i := 0; i < len(c.local_nsmallfree); i++ {
    648 		h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
    649 		c.local_nsmallfree[i] = 0
    650 	}
    651 }
    652 
    653 // Atomically increases a given *system* memory stat. We are counting on this
    654 // stat never overflowing a uintptr, so this function must only be used for
    655 // system memory stats.
    656 //
    657 // The current implementation for little endian architectures is based on
    658 // xadduintptr(), which is less than ideal: xadd64() should really be used.
    659 // Using xadduintptr() is a stop-gap solution until arm supports xadd64() that
    660 // doesn't use locks.  (Locks are a problem as they require a valid G, which
    661 // restricts their useability.)
    662 //
    663 // A side-effect of using xadduintptr() is that we need to check for
    664 // overflow errors.
    665 //go:nosplit
    666 func mSysStatInc(sysStat *uint64, n uintptr) {
    667 	if sys.BigEndian {
    668 		atomic.Xadd64(sysStat, int64(n))
    669 		return
    670 	}
    671 	if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n {
    672 		print("runtime: stat overflow: val ", val, ", n ", n, "\n")
    673 		exit(2)
    674 	}
    675 }
    676 
    677 // Atomically decreases a given *system* memory stat. Same comments as
    678 // mSysStatInc apply.
    679 //go:nosplit
    680 func mSysStatDec(sysStat *uint64, n uintptr) {
    681 	if sys.BigEndian {
    682 		atomic.Xadd64(sysStat, -int64(n))
    683 		return
    684 	}
    685 	if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n {
    686 		print("runtime: stat underflow: val ", val, ", n ", n, "\n")
    687 		exit(2)
    688 	}
    689 }
    690