1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup. 6 // It has gotten completely out of control. 7 8 // Garbage collector (GC). 9 // 10 // The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple 11 // GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 12 // non-generational and non-compacting. Allocation is done using size segregated per P allocation 13 // areas to minimize fragmentation while eliminating locks in the common case. 14 // 15 // The algorithm decomposes into several steps. 16 // This is a high level description of the algorithm being used. For an overview of GC a good 17 // place to start is Richard Jones' gchandbook.org. 18 // 19 // The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see 20 // Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 21 // On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 22 // 966-975. 23 // For journal quality proofs that these steps are complete, correct, and terminate see 24 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 25 // Concurrency and Computation: Practice and Experience 15(3-5), 2003. 26 // 27 // 0. Set phase = GCscan from GCoff. 28 // 1. Wait for all P's to acknowledge phase change. 29 // At this point all goroutines have passed through a GC safepoint and 30 // know we are in the GCscan phase. 31 // 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers 32 // (marking avoids most duplicate enqueuing but races may produce benign duplication). 33 // Preempted goroutines are scanned before P schedules next goroutine. 34 // 3. Set phase = GCmark. 35 // 4. Wait for all P's to acknowledge phase change. 36 // 5. Now write barrier marks and enqueues black, grey, or white to white pointers. 37 // Malloc still allocates white (non-marked) objects. 38 // 6. Meanwhile GC transitively walks the heap marking reachable objects. 39 // 7. When GC finishes marking heap, it preempts P's one-by-one and 40 // retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine 41 // currently scheduled on the P). 42 // 8. Once the GC has exhausted all available marking work it sets phase = marktermination. 43 // 9. Wait for all P's to acknowledge phase change. 44 // 10. Malloc now allocates black objects, so number of unmarked reachable objects 45 // monotonically decreases. 46 // 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet 47 // reachable objects. 48 // 12. When GC completes a full cycle over P's and discovers no new grey 49 // objects, (which means all reachable objects are marked) set phase = GCoff. 50 // 13. Wait for all P's to acknowledge phase change. 51 // 14. Now malloc allocates white (but sweeps spans before use). 52 // Write barrier becomes nop. 53 // 15. GC does background sweeping, see description below. 54 // 16. When sufficient allocation has taken place replay the sequence starting at 0 above, 55 // see discussion of GC rate below. 56 57 // Changing phases. 58 // Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase. 59 // All phase action must be benign in the presence of a change. 60 // Starting with GCoff 61 // GCoff to GCscan 62 // GSscan scans stacks and globals greying them and never marks an object black. 63 // Once all the P's are aware of the new phase they will scan gs on preemption. 64 // This means that the scanning of preempted gs can't start until all the Ps 65 // have acknowledged. 66 // When a stack is scanned, this phase also installs stack barriers to 67 // track how much of the stack has been active. 68 // This transition enables write barriers because stack barriers 69 // assume that writes to higher frames will be tracked by write 70 // barriers. Technically this only needs write barriers for writes 71 // to stack slots, but we enable write barriers in general. 72 // GCscan to GCmark 73 // In GCmark, work buffers are drained until there are no more 74 // pointers to scan. 75 // No scanning of objects (making them black) can happen until all 76 // Ps have enabled the write barrier, but that already happened in 77 // the transition to GCscan. 78 // GCmark to GCmarktermination 79 // The only change here is that we start allocating black so the Ps must acknowledge 80 // the change before we begin the termination algorithm 81 // GCmarktermination to GSsweep 82 // Object currently on the freelist must be marked black for this to work. 83 // Are things on the free lists black or white? How does the sweep phase work? 84 85 // Concurrent sweep. 86 // 87 // The sweep phase proceeds concurrently with normal program execution. 88 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 89 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 90 // At the end of STW mark termination all spans are marked as "needs sweeping". 91 // 92 // The background sweeper goroutine simply sweeps spans one-by-one. 93 // 94 // To avoid requesting more OS memory while there are unswept spans, when a 95 // goroutine needs another span, it first attempts to reclaim that much memory 96 // by sweeping. When a goroutine needs to allocate a new small-object span, it 97 // sweeps small-object spans for the same object size until it frees at least 98 // one object. When a goroutine needs to allocate large-object span from heap, 99 // it sweeps spans until it frees at least that many pages into heap. There is 100 // one case where this may not suffice: if a goroutine sweeps and frees two 101 // nonadjacent one-page spans to the heap, it will allocate a new two-page 102 // span, but there can still be other one-page unswept spans which could be 103 // combined into a two-page span. 104 // 105 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 106 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 107 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 108 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 109 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 110 // The finalizer goroutine is kicked off only when all spans are swept. 111 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 112 113 // GC rate. 114 // Next GC is after we've allocated an extra amount of memory proportional to 115 // the amount already in use. The proportion is controlled by GOGC environment variable 116 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 117 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 118 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 119 // (and also the amount of extra memory used). 120 121 package runtime 122 123 import "unsafe" 124 125 const ( 126 _DebugGC = 0 127 _ConcurrentSweep = true 128 _FinBlockSize = 4 * 1024 129 _RootData = 0 130 _RootBss = 1 131 _RootFinalizers = 2 132 _RootSpans = 3 133 _RootFlushCaches = 4 134 _RootCount = 5 135 136 debugStackBarrier = false 137 138 // sweepMinHeapDistance is a lower bound on the heap distance 139 // (in bytes) reserved for concurrent sweeping between GC 140 // cycles. This will be scaled by gcpercent/100. 141 sweepMinHeapDistance = 1024 * 1024 142 ) 143 144 // firstStackBarrierOffset is the approximate byte offset at 145 // which to place the first stack barrier from the current SP. 146 // This is a lower bound on how much stack will have to be 147 // re-scanned during mark termination. Subsequent barriers are 148 // placed at firstStackBarrierOffset * 2^n offsets. 149 // 150 // For debugging, this can be set to 0, which will install a 151 // stack barrier at every frame. If you do this, you may also 152 // have to raise _StackMin, since the stack barrier 153 // bookkeeping will use a large amount of each stack. 154 var firstStackBarrierOffset = 1024 155 156 // heapminimum is the minimum heap size at which to trigger GC. 157 // For small heaps, this overrides the usual GOGC*live set rule. 158 // 159 // When there is a very small live set but a lot of allocation, simply 160 // collecting when the heap reaches GOGC*live results in many GC 161 // cycles and high total per-GC overhead. This minimum amortizes this 162 // per-GC overhead while keeping the heap reasonably small. 163 // 164 // During initialization this is set to 4MB*GOGC/100. In the case of 165 // GOGC==0, this will set heapminimum to 0, resulting in constant 166 // collection even when the heap size is small, which is useful for 167 // debugging. 168 var heapminimum uint64 = defaultHeapMinimum 169 170 // defaultHeapMinimum is the value of heapminimum for GOGC==100. 171 const defaultHeapMinimum = 4 << 20 172 173 // Initialized from $GOGC. GOGC=off means no GC. 174 var gcpercent int32 175 176 func gcinit() { 177 if unsafe.Sizeof(workbuf{}) != _WorkbufSize { 178 throw("size of Workbuf is suboptimal") 179 } 180 181 work.markfor = parforalloc(_MaxGcproc) 182 _ = setGCPercent(readgogc()) 183 for datap := &firstmoduledata; datap != nil; datap = datap.next { 184 datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data) 185 datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss) 186 } 187 memstats.next_gc = heapminimum 188 } 189 190 func readgogc() int32 { 191 p := gogetenv("GOGC") 192 if p == "" { 193 return 100 194 } 195 if p == "off" { 196 return -1 197 } 198 return int32(atoi(p)) 199 } 200 201 // gcenable is called after the bulk of the runtime initialization, 202 // just before we're about to start letting user code run. 203 // It kicks off the background sweeper goroutine and enables GC. 204 func gcenable() { 205 c := make(chan int, 1) 206 go bgsweep(c) 207 <-c 208 memstats.enablegc = true // now that runtime is initialized, GC is okay 209 } 210 211 func setGCPercent(in int32) (out int32) { 212 lock(&mheap_.lock) 213 out = gcpercent 214 if in < 0 { 215 in = -1 216 } 217 gcpercent = in 218 heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 219 unlock(&mheap_.lock) 220 return out 221 } 222 223 // Garbage collector phase. 224 // Indicates to write barrier and sychronization task to preform. 225 var gcphase uint32 226 var writeBarrierEnabled bool // compiler emits references to this in write barriers 227 228 // gcBlackenEnabled is 1 if mutator assists and background mark 229 // workers are allowed to blacken objects. This must only be set when 230 // gcphase == _GCmark. 231 var gcBlackenEnabled uint32 232 233 // gcBlackenPromptly indicates that optimizations that may 234 // hide work from the global work queue should be disabled. 235 // 236 // If gcBlackenPromptly is true, per-P gcWork caches should 237 // be flushed immediately and new objects should be allocated black. 238 // 239 // There is a tension between allocating objects white and 240 // allocating them black. If white and the objects die before being 241 // marked they can be collected during this GC cycle. On the other 242 // hand allocating them black will reduce _GCmarktermination latency 243 // since more work is done in the mark phase. This tension is resolved 244 // by allocating white until the mark phase is approaching its end and 245 // then allocating black for the remainder of the mark phase. 246 var gcBlackenPromptly bool 247 248 const ( 249 _GCoff = iota // GC not running; sweeping in background, write barrier disabled 250 _GCstw // unused state 251 _GCscan // GC collecting roots into workbufs, write barrier ENABLED 252 _GCmark // GC marking from workbufs, write barrier ENABLED 253 _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED 254 ) 255 256 //go:nosplit 257 func setGCPhase(x uint32) { 258 atomicstore(&gcphase, x) 259 writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination || gcphase == _GCscan 260 } 261 262 // gcMarkWorkerMode represents the mode that a concurrent mark worker 263 // should operate in. 264 // 265 // Concurrent marking happens through four different mechanisms. One 266 // is mutator assists, which happen in response to allocations and are 267 // not scheduled. The other three are variations in the per-P mark 268 // workers and are distinguished by gcMarkWorkerMode. 269 type gcMarkWorkerMode int 270 271 const ( 272 // gcMarkWorkerDedicatedMode indicates that the P of a mark 273 // worker is dedicated to running that mark worker. The mark 274 // worker should run without preemption until concurrent mark 275 // is done. 276 gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota 277 278 // gcMarkWorkerFractionalMode indicates that a P is currently 279 // running the "fractional" mark worker. The fractional worker 280 // is necessary when GOMAXPROCS*gcGoalUtilization is not an 281 // integer. The fractional worker should run until it is 282 // preempted and will be scheduled to pick up the fractional 283 // part of GOMAXPROCS*gcGoalUtilization. 284 gcMarkWorkerFractionalMode 285 286 // gcMarkWorkerIdleMode indicates that a P is running the mark 287 // worker because it has nothing else to do. The idle worker 288 // should run until it is preempted and account its time 289 // against gcController.idleMarkTime. 290 gcMarkWorkerIdleMode 291 ) 292 293 // gcController implements the GC pacing controller that determines 294 // when to trigger concurrent garbage collection and how much marking 295 // work to do in mutator assists and background marking. 296 // 297 // It uses a feedback control algorithm to adjust the memstats.next_gc 298 // trigger based on the heap growth and GC CPU utilization each cycle. 299 // This algorithm optimizes for heap growth to match GOGC and for CPU 300 // utilization between assist and background marking to be 25% of 301 // GOMAXPROCS. The high-level design of this algorithm is documented 302 // at https://golang.org/s/go15gcpacing. 303 var gcController = gcControllerState{ 304 // Initial trigger ratio guess. 305 triggerRatio: 7 / 8.0, 306 } 307 308 type gcControllerState struct { 309 // scanWork is the total scan work performed this cycle. This 310 // is updated atomically during the cycle. Updates may be 311 // batched arbitrarily, since the value is only read at the 312 // end of the cycle. 313 // 314 // Currently this is the bytes of heap scanned. For most uses, 315 // this is an opaque unit of work, but for estimation the 316 // definition is important. 317 scanWork int64 318 319 // bgScanCredit is the scan work credit accumulated by the 320 // concurrent background scan. This credit is accumulated by 321 // the background scan and stolen by mutator assists. This is 322 // updated atomically. Updates occur in bounded batches, since 323 // it is both written and read throughout the cycle. 324 bgScanCredit int64 325 326 // assistTime is the nanoseconds spent in mutator assists 327 // during this cycle. This is updated atomically. Updates 328 // occur in bounded batches, since it is both written and read 329 // throughout the cycle. 330 assistTime int64 331 332 // dedicatedMarkTime is the nanoseconds spent in dedicated 333 // mark workers during this cycle. This is updated atomically 334 // at the end of the concurrent mark phase. 335 dedicatedMarkTime int64 336 337 // fractionalMarkTime is the nanoseconds spent in the 338 // fractional mark worker during this cycle. This is updated 339 // atomically throughout the cycle and will be up-to-date if 340 // the fractional mark worker is not currently running. 341 fractionalMarkTime int64 342 343 // idleMarkTime is the nanoseconds spent in idle marking 344 // during this cycle. This is updated atomically throughout 345 // the cycle. 346 idleMarkTime int64 347 348 // bgMarkStartTime is the absolute start time in nanoseconds 349 // that the background mark phase started. 350 bgMarkStartTime int64 351 352 // assistTime is the absolute start time in nanoseconds that 353 // mutator assists were enabled. 354 assistStartTime int64 355 356 // heapGoal is the goal memstats.heap_live for when this cycle 357 // ends. This is computed at the beginning of each cycle. 358 heapGoal uint64 359 360 // dedicatedMarkWorkersNeeded is the number of dedicated mark 361 // workers that need to be started. This is computed at the 362 // beginning of each cycle and decremented atomically as 363 // dedicated mark workers get started. 364 dedicatedMarkWorkersNeeded int64 365 366 // assistRatio is the ratio of allocated bytes to scan work 367 // that should be performed by mutator assists. This is 368 // computed at the beginning of each cycle and updated every 369 // time heap_scan is updated. 370 assistRatio float64 371 372 // fractionalUtilizationGoal is the fraction of wall clock 373 // time that should be spent in the fractional mark worker. 374 // For example, if the overall mark utilization goal is 25% 375 // and GOMAXPROCS is 6, one P will be a dedicated mark worker 376 // and this will be set to 0.5 so that 50% of the time some P 377 // is in a fractional mark worker. This is computed at the 378 // beginning of each cycle. 379 fractionalUtilizationGoal float64 380 381 // triggerRatio is the heap growth ratio at which the garbage 382 // collection cycle should start. E.g., if this is 0.6, then 383 // GC should start when the live heap has reached 1.6 times 384 // the heap size marked by the previous cycle. This is updated 385 // at the end of of each cycle. 386 triggerRatio float64 387 388 _ [_CacheLineSize]byte 389 390 // fractionalMarkWorkersNeeded is the number of fractional 391 // mark workers that need to be started. This is either 0 or 392 // 1. This is potentially updated atomically at every 393 // scheduling point (hence it gets its own cache line). 394 fractionalMarkWorkersNeeded int64 395 396 _ [_CacheLineSize]byte 397 } 398 399 // startCycle resets the GC controller's state and computes estimates 400 // for a new GC cycle. The caller must hold worldsema. 401 func (c *gcControllerState) startCycle() { 402 c.scanWork = 0 403 c.bgScanCredit = 0 404 c.assistTime = 0 405 c.dedicatedMarkTime = 0 406 c.fractionalMarkTime = 0 407 c.idleMarkTime = 0 408 409 // If this is the first GC cycle or we're operating on a very 410 // small heap, fake heap_marked so it looks like next_gc is 411 // the appropriate growth from heap_marked, even though the 412 // real heap_marked may not have a meaningful value (on the 413 // first cycle) or may be much smaller (resulting in a large 414 // error response). 415 if memstats.next_gc <= heapminimum { 416 memstats.heap_marked = uint64(float64(memstats.next_gc) / (1 + c.triggerRatio)) 417 memstats.heap_reachable = memstats.heap_marked 418 } 419 420 // Compute the heap goal for this cycle 421 c.heapGoal = memstats.heap_reachable + memstats.heap_reachable*uint64(gcpercent)/100 422 423 // Compute the total mark utilization goal and divide it among 424 // dedicated and fractional workers. 425 totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization 426 c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal) 427 c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) 428 if c.fractionalUtilizationGoal > 0 { 429 c.fractionalMarkWorkersNeeded = 1 430 } else { 431 c.fractionalMarkWorkersNeeded = 0 432 } 433 434 // Clear per-P state 435 for _, p := range &allp { 436 if p == nil { 437 break 438 } 439 p.gcAssistTime = 0 440 } 441 442 // Compute initial values for controls that are updated 443 // throughout the cycle. 444 c.revise() 445 446 if debug.gcpacertrace > 0 { 447 print("pacer: assist ratio=", c.assistRatio, 448 " (scan ", memstats.heap_scan>>20, " MB in ", 449 work.initialHeapLive>>20, "->", 450 c.heapGoal>>20, " MB)", 451 " workers=", c.dedicatedMarkWorkersNeeded, 452 "+", c.fractionalMarkWorkersNeeded, "\n") 453 } 454 } 455 456 // revise updates the assist ratio during the GC cycle to account for 457 // improved estimates. This should be called either under STW or 458 // whenever memstats.heap_scan is updated (with mheap_.lock held). 459 func (c *gcControllerState) revise() { 460 // Compute the expected scan work. This is a strict upper 461 // bound on the possible scan work in the current heap. 462 // 463 // You might consider dividing this by 2 (or by 464 // (100+GOGC)/100) to counter this over-estimation, but 465 // benchmarks show that this has almost no effect on mean 466 // mutator utilization, heap size, or assist time and it 467 // introduces the danger of under-estimating and letting the 468 // mutator outpace the garbage collector. 469 scanWorkExpected := memstats.heap_scan 470 471 // Compute the mutator assist ratio so by the time the mutator 472 // allocates the remaining heap bytes up to next_gc, it will 473 // have done (or stolen) the estimated amount of scan work. 474 heapDistance := int64(c.heapGoal) - int64(work.initialHeapLive) 475 if heapDistance <= 1024*1024 { 476 // heapDistance can be negative if GC start is delayed 477 // or if the allocation that pushed heap_live over 478 // next_gc is large or if the trigger is really close 479 // to GOGC. We don't want to set the assist negative 480 // (or divide by zero, or set it really high), so 481 // enforce a minimum on the distance. 482 heapDistance = 1024 * 1024 483 } 484 c.assistRatio = float64(scanWorkExpected) / float64(heapDistance) 485 } 486 487 // endCycle updates the GC controller state at the end of the 488 // concurrent part of the GC cycle. 489 func (c *gcControllerState) endCycle() { 490 h_t := c.triggerRatio // For debugging 491 492 // Proportional response gain for the trigger controller. Must 493 // be in [0, 1]. Lower values smooth out transient effects but 494 // take longer to respond to phase changes. Higher values 495 // react to phase changes quickly, but are more affected by 496 // transient changes. Values near 1 may be unstable. 497 const triggerGain = 0.5 498 499 // Compute next cycle trigger ratio. First, this computes the 500 // "error" for this cycle; that is, how far off the trigger 501 // was from what it should have been, accounting for both heap 502 // growth and GC CPU utilization. We compute the actual heap 503 // growth during this cycle and scale that by how far off from 504 // the goal CPU utilization we were (to estimate the heap 505 // growth if we had the desired CPU utilization). The 506 // difference between this estimate and the GOGC-based goal 507 // heap growth is the error. 508 // 509 // TODO(austin): next_gc is based on heap_reachable, not 510 // heap_marked, which means the actual growth ratio 511 // technically isn't comparable to the trigger ratio. 512 goalGrowthRatio := float64(gcpercent) / 100 513 actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 514 assistDuration := nanotime() - c.assistStartTime 515 516 // Assume background mark hit its utilization goal. 517 utilization := gcGoalUtilization 518 // Add assist utilization; avoid divide by zero. 519 if assistDuration > 0 { 520 utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) 521 } 522 523 triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio) 524 525 // Finally, we adjust the trigger for next time by this error, 526 // damped by the proportional gain. 527 c.triggerRatio += triggerGain * triggerError 528 if c.triggerRatio < 0 { 529 // This can happen if the mutator is allocating very 530 // quickly or the GC is scanning very slowly. 531 c.triggerRatio = 0 532 } else if c.triggerRatio > goalGrowthRatio*0.95 { 533 // Ensure there's always a little margin so that the 534 // mutator assist ratio isn't infinity. 535 c.triggerRatio = goalGrowthRatio * 0.95 536 } 537 538 if debug.gcpacertrace > 0 { 539 // Print controller state in terms of the design 540 // document. 541 H_m_prev := memstats.heap_marked 542 H_T := memstats.next_gc 543 h_a := actualGrowthRatio 544 H_a := memstats.heap_live 545 h_g := goalGrowthRatio 546 H_g := int64(float64(H_m_prev) * (1 + h_g)) 547 u_a := utilization 548 u_g := gcGoalUtilization 549 W_a := c.scanWork 550 print("pacer: H_m_prev=", H_m_prev, 551 " h_t=", h_t, " H_T=", H_T, 552 " h_a=", h_a, " H_a=", H_a, 553 " h_g=", h_g, " H_g=", H_g, 554 " u_a=", u_a, " u_g=", u_g, 555 " W_a=", W_a, 556 " goal=", goalGrowthRatio-h_t, 557 " actual=", h_a-h_t, 558 " u_a/u_g=", u_a/u_g, 559 "\n") 560 } 561 } 562 563 // findRunnableGCWorker returns the background mark worker for _p_ if it 564 // should be run. This must only be called when gcBlackenEnabled != 0. 565 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { 566 if gcBlackenEnabled == 0 { 567 throw("gcControllerState.findRunnable: blackening not enabled") 568 } 569 if _p_.gcBgMarkWorker == nil { 570 throw("gcControllerState.findRunnable: no background mark worker") 571 } 572 if work.bgMark1.done != 0 && work.bgMark2.done != 0 { 573 // Background mark is done. Don't schedule background 574 // mark worker any more. (This is not just an 575 // optimization. Without this we can spin scheduling 576 // the background worker and having it return 577 // immediately with no work to do.) 578 return nil 579 } 580 581 decIfPositive := func(ptr *int64) bool { 582 if *ptr > 0 { 583 if xaddint64(ptr, -1) >= 0 { 584 return true 585 } 586 // We lost a race 587 xaddint64(ptr, +1) 588 } 589 return false 590 } 591 592 if decIfPositive(&c.dedicatedMarkWorkersNeeded) { 593 // This P is now dedicated to marking until the end of 594 // the concurrent mark phase. 595 _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode 596 // TODO(austin): This P isn't going to run anything 597 // else for a while, so kick everything out of its run 598 // queue. 599 } else { 600 if _p_.gcw.wbuf == 0 && work.full == 0 && work.partial == 0 { 601 // No work to be done right now. This can 602 // happen at the end of the mark phase when 603 // there are still assists tapering off. Don't 604 // bother running background mark because 605 // it'll just return immediately. 606 if work.nwait == work.nproc { 607 // There are also no workers, which 608 // means we've reached a completion point. 609 // There may not be any workers to 610 // signal it, so signal it here. 611 readied := false 612 if gcBlackenPromptly { 613 if work.bgMark1.done == 0 { 614 throw("completing mark 2, but bgMark1.done == 0") 615 } 616 readied = work.bgMark2.complete() 617 } else { 618 readied = work.bgMark1.complete() 619 } 620 if readied { 621 // complete just called ready, 622 // but we're inside the 623 // scheduler. Let it know that 624 // that's okay. 625 resetspinning() 626 } 627 } 628 return nil 629 } 630 if !decIfPositive(&c.fractionalMarkWorkersNeeded) { 631 // No more workers are need right now. 632 return nil 633 } 634 635 // This P has picked the token for the fractional worker. 636 // Is the GC currently under or at the utilization goal? 637 // If so, do more work. 638 // 639 // We used to check whether doing one time slice of work 640 // would remain under the utilization goal, but that has the 641 // effect of delaying work until the mutator has run for 642 // enough time slices to pay for the work. During those time 643 // slices, write barriers are enabled, so the mutator is running slower. 644 // Now instead we do the work whenever we're under or at the 645 // utilization work and pay for it by letting the mutator run later. 646 // This doesn't change the overall utilization averages, but it 647 // front loads the GC work so that the GC finishes earlier and 648 // write barriers can be turned off sooner, effectively giving 649 // the mutator a faster machine. 650 // 651 // The old, slower behavior can be restored by setting 652 // gcForcePreemptNS = forcePreemptNS. 653 const gcForcePreemptNS = 0 654 655 // TODO(austin): We could fast path this and basically 656 // eliminate contention on c.fractionalMarkWorkersNeeded by 657 // precomputing the minimum time at which it's worth 658 // next scheduling the fractional worker. Then Ps 659 // don't have to fight in the window where we've 660 // passed that deadline and no one has started the 661 // worker yet. 662 // 663 // TODO(austin): Shorter preemption interval for mark 664 // worker to improve fairness and give this 665 // finer-grained control over schedule? 666 now := nanotime() - gcController.bgMarkStartTime 667 then := now + gcForcePreemptNS 668 timeUsed := c.fractionalMarkTime + gcForcePreemptNS 669 if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal { 670 // Nope, we'd overshoot the utilization goal 671 xaddint64(&c.fractionalMarkWorkersNeeded, +1) 672 return nil 673 } 674 _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode 675 } 676 677 // Run the background mark worker 678 gp := _p_.gcBgMarkWorker 679 casgstatus(gp, _Gwaiting, _Grunnable) 680 if trace.enabled { 681 traceGoUnpark(gp, 0) 682 } 683 return gp 684 } 685 686 // gcGoalUtilization is the goal CPU utilization for background 687 // marking as a fraction of GOMAXPROCS. 688 const gcGoalUtilization = 0.25 689 690 // gcBgCreditSlack is the amount of scan work credit background 691 // scanning can accumulate locally before updating 692 // gcController.bgScanCredit. Lower values give mutator assists more 693 // accurate accounting of background scanning. Higher values reduce 694 // memory contention. 695 const gcBgCreditSlack = 2000 696 697 // gcAssistTimeSlack is the nanoseconds of mutator assist time that 698 // can accumulate on a P before updating gcController.assistTime. 699 const gcAssistTimeSlack = 5000 700 701 // Determine whether to initiate a GC. 702 // If the GC is already working no need to trigger another one. 703 // This should establish a feedback loop where if the GC does not 704 // have sufficient time to complete then more memory will be 705 // requested from the OS increasing heap size thus allow future 706 // GCs more time to complete. 707 // memstat.heap_live read has a benign race. 708 // A false negative simple does not start a GC, a false positive 709 // will start a GC needlessly. Neither have correctness issues. 710 func shouldtriggergc() bool { 711 return memstats.heap_live >= memstats.next_gc && atomicloaduint(&bggc.working) == 0 712 } 713 714 // bgMarkSignal synchronizes the GC coordinator and background mark workers. 715 type bgMarkSignal struct { 716 // Workers race to cas to 1. Winner signals coordinator. 717 done uint32 718 // Coordinator to wake up. 719 lock mutex 720 g *g 721 wake bool 722 } 723 724 func (s *bgMarkSignal) wait() { 725 lock(&s.lock) 726 if s.wake { 727 // Wakeup already happened 728 unlock(&s.lock) 729 } else { 730 s.g = getg() 731 goparkunlock(&s.lock, "mark wait (idle)", traceEvGoBlock, 1) 732 } 733 s.wake = false 734 s.g = nil 735 } 736 737 // complete signals the completion of this phase of marking. This can 738 // be called multiple times during a cycle; only the first call has 739 // any effect. 740 // 741 // The caller should arrange to deschedule itself as soon as possible 742 // after calling complete in order to let the coordinator goroutine 743 // run. 744 func (s *bgMarkSignal) complete() bool { 745 if cas(&s.done, 0, 1) { 746 // This is the first worker to reach this completion point. 747 // Signal the main GC goroutine. 748 lock(&s.lock) 749 if s.g == nil { 750 // It hasn't parked yet. 751 s.wake = true 752 } else { 753 ready(s.g, 0) 754 } 755 unlock(&s.lock) 756 return true 757 } 758 return false 759 } 760 761 func (s *bgMarkSignal) clear() { 762 s.done = 0 763 } 764 765 var work struct { 766 full uint64 // lock-free list of full blocks workbuf 767 empty uint64 // lock-free list of empty blocks workbuf 768 // TODO(rlh): partial no longer used, remove. (issue #11922) 769 partial uint64 // lock-free list of partially filled blocks workbuf 770 pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait 771 nproc uint32 772 tstart int64 773 nwait uint32 774 ndone uint32 775 alldone note 776 markfor *parfor 777 778 bgMarkReady note // signal background mark worker has started 779 bgMarkDone uint32 // cas to 1 when at a background mark completion point 780 // Background mark completion signaling 781 782 // Coordination for the 2 parts of the mark phase. 783 bgMark1 bgMarkSignal 784 bgMark2 bgMarkSignal 785 786 // Copy of mheap.allspans for marker or sweeper. 787 spans []*mspan 788 789 // totaltime is the CPU nanoseconds spent in GC since the 790 // program started if debug.gctrace > 0. 791 totaltime int64 792 793 // bytesMarked is the number of bytes marked this cycle. This 794 // includes bytes blackened in scanned objects, noscan objects 795 // that go straight to black, and permagrey objects scanned by 796 // markroot during the concurrent scan phase. This is updated 797 // atomically during the cycle. Updates may be batched 798 // arbitrarily, since the value is only read at the end of the 799 // cycle. 800 // 801 // Because of benign races during marking, this number may not 802 // be the exact number of marked bytes, but it should be very 803 // close. 804 bytesMarked uint64 805 806 // initialHeapLive is the value of memstats.heap_live at the 807 // beginning of this GC cycle. 808 initialHeapLive uint64 809 } 810 811 // GC runs a garbage collection and blocks the caller until the 812 // garbage collection is complete. It may also block the entire 813 // program. 814 func GC() { 815 startGC(gcForceBlockMode, false) 816 } 817 818 const ( 819 gcBackgroundMode = iota // concurrent GC 820 gcForceMode // stop-the-world GC now 821 gcForceBlockMode // stop-the-world GC now and wait for sweep 822 ) 823 824 // startGC starts a GC cycle. If mode is gcBackgroundMode, this will 825 // start GC in the background and return. Otherwise, this will block 826 // until the new GC cycle is started and finishes. If forceTrigger is 827 // true, it indicates that GC should be started regardless of the 828 // current heap size. 829 func startGC(mode int, forceTrigger bool) { 830 // The gc is turned off (via enablegc) until the bootstrap has completed. 831 // Also, malloc gets called in the guts of a number of libraries that might be 832 // holding locks. To avoid deadlocks during stop-the-world, don't bother 833 // trying to run gc while holding a lock. The next mallocgc without a lock 834 // will do the gc instead. 835 mp := acquirem() 836 if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" || !memstats.enablegc || panicking != 0 || gcpercent < 0 { 837 releasem(mp) 838 return 839 } 840 releasem(mp) 841 mp = nil 842 843 if debug.gcstoptheworld == 1 { 844 mode = gcForceMode 845 } else if debug.gcstoptheworld == 2 { 846 mode = gcForceBlockMode 847 } 848 849 if mode != gcBackgroundMode { 850 // special synchronous cases 851 gc(mode) 852 return 853 } 854 855 // trigger concurrent GC 856 readied := false 857 lock(&bggc.lock) 858 // The trigger was originally checked speculatively, so 859 // recheck that this really should trigger GC. (For example, 860 // we may have gone through a whole GC cycle since the 861 // speculative check.) 862 if !(forceTrigger || shouldtriggergc()) { 863 unlock(&bggc.lock) 864 return 865 } 866 if !bggc.started { 867 bggc.working = 1 868 bggc.started = true 869 readied = true 870 go backgroundgc() 871 } else if bggc.working == 0 { 872 bggc.working = 1 873 readied = true 874 ready(bggc.g, 0) 875 } 876 unlock(&bggc.lock) 877 if readied { 878 // This G just started or ready()d the GC goroutine. 879 // Switch directly to it by yielding. 880 Gosched() 881 } 882 } 883 884 // State of the background concurrent GC goroutine. 885 var bggc struct { 886 lock mutex 887 g *g 888 working uint 889 started bool 890 } 891 892 // backgroundgc is running in a goroutine and does the concurrent GC work. 893 // bggc holds the state of the backgroundgc. 894 func backgroundgc() { 895 bggc.g = getg() 896 for { 897 gc(gcBackgroundMode) 898 lock(&bggc.lock) 899 bggc.working = 0 900 goparkunlock(&bggc.lock, "Concurrent GC wait", traceEvGoBlock, 1) 901 } 902 } 903 904 func gc(mode int) { 905 // Timing/utilization tracking 906 var stwprocs, maxprocs int32 907 var tSweepTerm, tScan, tInstallWB, tMark, tMarkTerm int64 908 909 // debug.gctrace variables 910 var heap0, heap1, heap2, heapGoal uint64 911 912 // memstats statistics 913 var now, pauseStart, pauseNS int64 914 915 // Ok, we're doing it! Stop everybody else 916 semacquire(&worldsema, false) 917 918 // Pick up the remaining unswept/not being swept spans concurrently 919 // 920 // This shouldn't happen if we're being invoked in background 921 // mode since proportional sweep should have just finished 922 // sweeping everything, but rounding errors, etc, may leave a 923 // few spans unswept. In forced mode, this is necessary since 924 // GC can be forced at any point in the sweeping cycle. 925 for gosweepone() != ^uintptr(0) { 926 sweep.nbgsweep++ 927 } 928 929 if trace.enabled { 930 traceGCStart() 931 } 932 933 if mode == gcBackgroundMode { 934 gcBgMarkStartWorkers() 935 } 936 now = nanotime() 937 stwprocs, maxprocs = gcprocs(), gomaxprocs 938 tSweepTerm = now 939 heap0 = memstats.heap_live 940 941 pauseStart = now 942 systemstack(stopTheWorldWithSema) 943 systemstack(finishsweep_m) // finish sweep before we start concurrent scan. 944 // clearpools before we start the GC. If we wait they memory will not be 945 // reclaimed until the next GC cycle. 946 clearpools() 947 948 gcResetMarkState() 949 950 if mode == gcBackgroundMode { // Do as much work concurrently as possible 951 gcController.startCycle() 952 heapGoal = gcController.heapGoal 953 954 systemstack(func() { 955 // Enter scan phase. This enables write 956 // barriers to track changes to stack frames 957 // above the stack barrier. 958 // 959 // TODO: This has evolved to the point where 960 // we carefully ensure invariants we no longer 961 // depend on. Either: 962 // 963 // 1) Enable full write barriers for the scan, 964 // but eliminate the ragged barrier below 965 // (since the start the world ensures all Ps 966 // have observed the write barrier enable) and 967 // consider draining during the scan. 968 // 969 // 2) Only enable write barriers for writes to 970 // the stack at this point, and then enable 971 // write barriers for heap writes when we 972 // enter the mark phase. This means we cannot 973 // drain in the scan phase and must perform a 974 // ragged barrier to ensure all Ps have 975 // enabled heap write barriers before we drain 976 // or enable assists. 977 // 978 // 3) Don't install stack barriers over frame 979 // boundaries where there are up-pointers. 980 setGCPhase(_GCscan) 981 982 gcBgMarkPrepare() // Must happen before assist enable. 983 984 // At this point all Ps have enabled the write 985 // barrier, thus maintaining the no white to 986 // black invariant. Enable mutator assists to 987 // put back-pressure on fast allocating 988 // mutators. 989 atomicstore(&gcBlackenEnabled, 1) 990 991 // Concurrent scan. 992 startTheWorldWithSema() 993 now = nanotime() 994 pauseNS += now - pauseStart 995 tScan = now 996 gcController.assistStartTime = now 997 gcscan_m() 998 999 // Enter mark phase. 1000 tInstallWB = nanotime() 1001 setGCPhase(_GCmark) 1002 // Ensure all Ps have observed the phase 1003 // change and have write barriers enabled 1004 // before any blackening occurs. 1005 forEachP(func(*p) {}) 1006 }) 1007 // Concurrent mark. 1008 tMark = nanotime() 1009 1010 // Enable background mark workers and wait for 1011 // background mark completion. 1012 gcController.bgMarkStartTime = nanotime() 1013 work.bgMark1.clear() 1014 work.bgMark1.wait() 1015 1016 // The global work list is empty, but there can still be work 1017 // sitting in the per-P work caches and there can be more 1018 // objects reachable from global roots since they don't have write 1019 // barriers. Rescan some roots and flush work caches. 1020 systemstack(func() { 1021 // rescan global data and bss. 1022 markroot(nil, _RootData) 1023 markroot(nil, _RootBss) 1024 1025 // Disallow caching workbufs. 1026 gcBlackenPromptly = true 1027 1028 // Flush all currently cached workbufs. This 1029 // also forces any remaining background 1030 // workers out of their loop. 1031 forEachP(func(_p_ *p) { 1032 _p_.gcw.dispose() 1033 }) 1034 }) 1035 1036 // Wait for this more aggressive background mark to complete. 1037 work.bgMark2.clear() 1038 work.bgMark2.wait() 1039 1040 // Begin mark termination. 1041 now = nanotime() 1042 tMarkTerm = now 1043 pauseStart = now 1044 systemstack(stopTheWorldWithSema) 1045 // The gcphase is _GCmark, it will transition to _GCmarktermination 1046 // below. The important thing is that the wb remains active until 1047 // all marking is complete. This includes writes made by the GC. 1048 1049 // Flush the gcWork caches. This must be done before 1050 // endCycle since endCycle depends on statistics kept 1051 // in these caches. 1052 gcFlushGCWork() 1053 1054 gcController.endCycle() 1055 } else { 1056 // For non-concurrent GC (mode != gcBackgroundMode) 1057 // The g stacks have not been scanned so clear g state 1058 // such that mark termination scans all stacks. 1059 gcResetGState() 1060 1061 t := nanotime() 1062 tScan, tInstallWB, tMark, tMarkTerm = t, t, t, t 1063 heapGoal = heap0 1064 } 1065 1066 // World is stopped. 1067 // Start marktermination which includes enabling the write barrier. 1068 atomicstore(&gcBlackenEnabled, 0) 1069 gcBlackenPromptly = false 1070 setGCPhase(_GCmarktermination) 1071 1072 heap1 = memstats.heap_live 1073 startTime := nanotime() 1074 1075 mp := acquirem() 1076 mp.preemptoff = "gcing" 1077 _g_ := getg() 1078 _g_.m.traceback = 2 1079 gp := _g_.m.curg 1080 casgstatus(gp, _Grunning, _Gwaiting) 1081 gp.waitreason = "garbage collection" 1082 1083 // Run gc on the g0 stack. We do this so that the g stack 1084 // we're currently running on will no longer change. Cuts 1085 // the root set down a bit (g0 stacks are not scanned, and 1086 // we don't need to scan gc's internal state). We also 1087 // need to switch to g0 so we can shrink the stack. 1088 systemstack(func() { 1089 gcMark(startTime) 1090 // Must return immediately. 1091 // The outer function's stack may have moved 1092 // during gcMark (it shrinks stacks, including the 1093 // outer function's stack), so we must not refer 1094 // to any of its variables. Return back to the 1095 // non-system stack to pick up the new addresses 1096 // before continuing. 1097 }) 1098 1099 systemstack(func() { 1100 heap2 = work.bytesMarked 1101 if debug.gccheckmark > 0 { 1102 // Run a full stop-the-world mark using checkmark bits, 1103 // to check that we didn't forget to mark anything during 1104 // the concurrent mark process. 1105 gcResetGState() // Rescan stacks 1106 gcResetMarkState() 1107 initCheckmarks() 1108 gcMark(startTime) 1109 clearCheckmarks() 1110 } 1111 1112 // marking is complete so we can turn the write barrier off 1113 setGCPhase(_GCoff) 1114 gcSweep(mode) 1115 1116 if debug.gctrace > 1 { 1117 startTime = nanotime() 1118 // The g stacks have been scanned so 1119 // they have gcscanvalid==true and gcworkdone==true. 1120 // Reset these so that all stacks will be rescanned. 1121 gcResetGState() 1122 gcResetMarkState() 1123 finishsweep_m() 1124 1125 // Still in STW but gcphase is _GCoff, reset to _GCmarktermination 1126 // At this point all objects will be found during the gcMark which 1127 // does a complete STW mark and object scan. 1128 setGCPhase(_GCmarktermination) 1129 gcMark(startTime) 1130 setGCPhase(_GCoff) // marking is done, turn off wb. 1131 gcSweep(mode) 1132 } 1133 }) 1134 1135 _g_.m.traceback = 0 1136 casgstatus(gp, _Gwaiting, _Grunning) 1137 1138 if trace.enabled { 1139 traceGCDone() 1140 } 1141 1142 // all done 1143 mp.preemptoff = "" 1144 1145 if gcphase != _GCoff { 1146 throw("gc done but gcphase != _GCoff") 1147 } 1148 1149 // Update timing memstats 1150 now, unixNow := nanotime(), unixnanotime() 1151 pauseNS += now - pauseStart 1152 atomicstore64(&memstats.last_gc, uint64(unixNow)) // must be Unix time to make sense to user 1153 memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(pauseNS) 1154 memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) 1155 memstats.pause_total_ns += uint64(pauseNS) 1156 1157 // Update work.totaltime. 1158 sweepTermCpu := int64(stwprocs) * (tScan - tSweepTerm) 1159 scanCpu := tInstallWB - tScan 1160 installWBCpu := int64(0) 1161 // We report idle marking time below, but omit it from the 1162 // overall utilization here since it's "free". 1163 markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime 1164 markTermCpu := int64(stwprocs) * (now - tMarkTerm) 1165 cycleCpu := sweepTermCpu + scanCpu + installWBCpu + markCpu + markTermCpu 1166 work.totaltime += cycleCpu 1167 1168 // Compute overall GC CPU utilization. 1169 totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) 1170 memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) 1171 1172 memstats.numgc++ 1173 1174 systemstack(startTheWorldWithSema) 1175 semrelease(&worldsema) 1176 1177 releasem(mp) 1178 mp = nil 1179 1180 if debug.gctrace > 0 { 1181 tEnd := now 1182 util := int(memstats.gc_cpu_fraction * 100) 1183 1184 var sbuf [24]byte 1185 printlock() 1186 print("gc ", memstats.numgc, 1187 " @", string(itoaDiv(sbuf[:], uint64(tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", 1188 util, "%: ") 1189 prev := tSweepTerm 1190 for i, ns := range []int64{tScan, tInstallWB, tMark, tMarkTerm, tEnd} { 1191 if i != 0 { 1192 print("+") 1193 } 1194 print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) 1195 prev = ns 1196 } 1197 print(" ms clock, ") 1198 for i, ns := range []int64{sweepTermCpu, scanCpu, installWBCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { 1199 if i == 4 || i == 5 { 1200 // Separate mark time components with /. 1201 print("/") 1202 } else if i != 0 { 1203 print("+") 1204 } 1205 print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) 1206 } 1207 print(" ms cpu, ", 1208 heap0>>20, "->", heap1>>20, "->", heap2>>20, " MB, ", 1209 heapGoal>>20, " MB goal, ", 1210 maxprocs, " P") 1211 if mode != gcBackgroundMode { 1212 print(" (forced)") 1213 } 1214 print("\n") 1215 printunlock() 1216 } 1217 sweep.nbgsweep = 0 1218 sweep.npausesweep = 0 1219 1220 // now that gc is done, kick off finalizer thread if needed 1221 if !concurrentSweep { 1222 // give the queued finalizers, if any, a chance to run 1223 Gosched() 1224 } 1225 } 1226 1227 // gcBgMarkStartWorkers prepares background mark worker goroutines. 1228 // These goroutines will not run until the mark phase, but they must 1229 // be started while the work is not stopped and from a regular G 1230 // stack. The caller must hold worldsema. 1231 func gcBgMarkStartWorkers() { 1232 // Background marking is performed by per-P G's. Ensure that 1233 // each P has a background GC G. 1234 for _, p := range &allp { 1235 if p == nil || p.status == _Pdead { 1236 break 1237 } 1238 if p.gcBgMarkWorker == nil { 1239 go gcBgMarkWorker(p) 1240 notetsleepg(&work.bgMarkReady, -1) 1241 noteclear(&work.bgMarkReady) 1242 } 1243 } 1244 } 1245 1246 // gcBgMarkPrepare sets up state for background marking. 1247 // Mutator assists must not yet be enabled. 1248 func gcBgMarkPrepare() { 1249 // Background marking will stop when the work queues are empty 1250 // and there are no more workers (note that, since this is 1251 // concurrent, this may be a transient state, but mark 1252 // termination will clean it up). Between background workers 1253 // and assists, we don't really know how many workers there 1254 // will be, so we pretend to have an arbitrarily large number 1255 // of workers, almost all of which are "waiting". While a 1256 // worker is working it decrements nwait. If nproc == nwait, 1257 // there are no workers. 1258 work.nproc = ^uint32(0) 1259 work.nwait = ^uint32(0) 1260 1261 // Reset background mark completion points. 1262 work.bgMark1.done = 1 1263 work.bgMark2.done = 1 1264 } 1265 1266 func gcBgMarkWorker(p *p) { 1267 // Register this G as the background mark worker for p. 1268 if p.gcBgMarkWorker != nil { 1269 throw("P already has a background mark worker") 1270 } 1271 gp := getg() 1272 1273 mp := acquirem() 1274 p.gcBgMarkWorker = gp 1275 // After this point, the background mark worker is scheduled 1276 // cooperatively by gcController.findRunnable. Hence, it must 1277 // never be preempted, as this would put it into _Grunnable 1278 // and put it on a run queue. Instead, when the preempt flag 1279 // is set, this puts itself into _Gwaiting to be woken up by 1280 // gcController.findRunnable at the appropriate time. 1281 notewakeup(&work.bgMarkReady) 1282 for { 1283 // Go to sleep until woken by gcContoller.findRunnable. 1284 // We can't releasem yet since even the call to gopark 1285 // may be preempted. 1286 gopark(func(g *g, mp unsafe.Pointer) bool { 1287 releasem((*m)(mp)) 1288 return true 1289 }, unsafe.Pointer(mp), "mark worker (idle)", traceEvGoBlock, 0) 1290 1291 // Loop until the P dies and disassociates this 1292 // worker. (The P may later be reused, in which case 1293 // it will get a new worker.) 1294 if p.gcBgMarkWorker != gp { 1295 break 1296 } 1297 1298 // Disable preemption so we can use the gcw. If the 1299 // scheduler wants to preempt us, we'll stop draining, 1300 // dispose the gcw, and then preempt. 1301 mp = acquirem() 1302 1303 if gcBlackenEnabled == 0 { 1304 throw("gcBgMarkWorker: blackening not enabled") 1305 } 1306 1307 startTime := nanotime() 1308 1309 decnwait := xadd(&work.nwait, -1) 1310 if decnwait == work.nproc { 1311 println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1312 throw("work.nwait was > work.nproc") 1313 } 1314 1315 done := false 1316 switch p.gcMarkWorkerMode { 1317 default: 1318 throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1319 case gcMarkWorkerDedicatedMode: 1320 gcDrain(&p.gcw, gcBgCreditSlack) 1321 // gcDrain did the xadd(&work.nwait +1) to 1322 // match the decrement above. It only returns 1323 // at a mark completion point. 1324 done = true 1325 if !p.gcw.empty() { 1326 throw("gcDrain returned with buffer") 1327 } 1328 case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode: 1329 gcDrainUntilPreempt(&p.gcw, gcBgCreditSlack) 1330 1331 // If we are nearing the end of mark, dispose 1332 // of the cache promptly. We must do this 1333 // before signaling that we're no longer 1334 // working so that other workers can't observe 1335 // no workers and no work while we have this 1336 // cached, and before we compute done. 1337 if gcBlackenPromptly { 1338 p.gcw.dispose() 1339 } 1340 1341 // Was this the last worker and did we run out 1342 // of work? 1343 incnwait := xadd(&work.nwait, +1) 1344 if incnwait > work.nproc { 1345 println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode, 1346 "work.nwait=", incnwait, "work.nproc=", work.nproc) 1347 throw("work.nwait > work.nproc") 1348 } 1349 done = incnwait == work.nproc && work.full == 0 && work.partial == 0 1350 } 1351 1352 // If this worker reached a background mark completion 1353 // point, signal the main GC goroutine. 1354 if done { 1355 if gcBlackenPromptly { 1356 if work.bgMark1.done == 0 { 1357 throw("completing mark 2, but bgMark1.done == 0") 1358 } 1359 work.bgMark2.complete() 1360 } else { 1361 work.bgMark1.complete() 1362 } 1363 } 1364 1365 duration := nanotime() - startTime 1366 switch p.gcMarkWorkerMode { 1367 case gcMarkWorkerDedicatedMode: 1368 xaddint64(&gcController.dedicatedMarkTime, duration) 1369 xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) 1370 case gcMarkWorkerFractionalMode: 1371 xaddint64(&gcController.fractionalMarkTime, duration) 1372 xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) 1373 case gcMarkWorkerIdleMode: 1374 xaddint64(&gcController.idleMarkTime, duration) 1375 } 1376 } 1377 } 1378 1379 // gcMarkWorkAvailable returns true if executing a mark worker 1380 // on p is potentially useful. 1381 func gcMarkWorkAvailable(p *p) bool { 1382 if !p.gcw.empty() { 1383 return true 1384 } 1385 if atomicload64(&work.full) != 0 || atomicload64(&work.partial) != 0 { 1386 return true // global work available 1387 } 1388 return false 1389 } 1390 1391 // gcFlushGCWork disposes the gcWork caches of all Ps. The world must 1392 // be stopped. 1393 //go:nowritebarrier 1394 func gcFlushGCWork() { 1395 // Gather all cached GC work. All other Ps are stopped, so 1396 // it's safe to manipulate their GC work caches. 1397 for i := 0; i < int(gomaxprocs); i++ { 1398 allp[i].gcw.dispose() 1399 } 1400 } 1401 1402 // gcMark runs the mark (or, for concurrent GC, mark termination) 1403 // STW is in effect at this point. 1404 //TODO go:nowritebarrier 1405 func gcMark(start_time int64) { 1406 if debug.allocfreetrace > 0 { 1407 tracegc() 1408 } 1409 1410 if gcphase != _GCmarktermination { 1411 throw("in gcMark expecting to see gcphase as _GCmarktermination") 1412 } 1413 work.tstart = start_time 1414 1415 gcCopySpans() // TODO(rlh): should this be hoisted and done only once? Right now it is done for normal marking and also for checkmarking. 1416 1417 // Make sure the per-P gcWork caches are empty. During mark 1418 // termination, these caches can still be used temporarily, 1419 // but must be disposed to the global lists immediately. 1420 gcFlushGCWork() 1421 1422 work.nwait = 0 1423 work.ndone = 0 1424 work.nproc = uint32(gcprocs()) 1425 1426 if trace.enabled { 1427 traceGCScanStart() 1428 } 1429 1430 parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), false, markroot) 1431 if work.nproc > 1 { 1432 noteclear(&work.alldone) 1433 helpgc(int32(work.nproc)) 1434 } 1435 1436 gchelperstart() 1437 parfordo(work.markfor) 1438 1439 var gcw gcWork 1440 gcDrain(&gcw, -1) 1441 gcw.dispose() 1442 1443 if work.full != 0 { 1444 throw("work.full != 0") 1445 } 1446 if work.partial != 0 { 1447 throw("work.partial != 0") 1448 } 1449 1450 if work.nproc > 1 { 1451 notesleep(&work.alldone) 1452 } 1453 1454 for i := 0; i < int(gomaxprocs); i++ { 1455 if allp[i].gcw.wbuf != 0 { 1456 throw("P has cached GC work at end of mark termination") 1457 } 1458 } 1459 1460 if trace.enabled { 1461 traceGCScanDone() 1462 } 1463 1464 // TODO(austin): This doesn't have to be done during STW, as 1465 // long as we block the next GC cycle until this is done. Move 1466 // it after we start the world, but before dropping worldsema. 1467 // (See issue #11465.) 1468 freeStackSpans() 1469 1470 cachestats() 1471 1472 // Compute the reachable heap size at the beginning of the 1473 // cycle. This is approximately the marked heap size at the 1474 // end (which we know) minus the amount of marked heap that 1475 // was allocated after marking began (which we don't know, but 1476 // is approximately the amount of heap that was allocated 1477 // since marking began). 1478 allocatedDuringCycle := memstats.heap_live - work.initialHeapLive 1479 if work.bytesMarked >= allocatedDuringCycle { 1480 memstats.heap_reachable = work.bytesMarked - allocatedDuringCycle 1481 } else { 1482 // This can happen if most of the allocation during 1483 // the cycle never became reachable from the heap. 1484 // Just set the reachable heap approximation to 0 and 1485 // let the heapminimum kick in below. 1486 memstats.heap_reachable = 0 1487 } 1488 1489 // Trigger the next GC cycle when the allocated heap has grown 1490 // by triggerRatio over the reachable heap size. Assume that 1491 // we're in steady state, so the reachable heap size is the 1492 // same now as it was at the beginning of the GC cycle. 1493 memstats.next_gc = uint64(float64(memstats.heap_reachable) * (1 + gcController.triggerRatio)) 1494 if memstats.next_gc < heapminimum { 1495 memstats.next_gc = heapminimum 1496 } 1497 if int64(memstats.next_gc) < 0 { 1498 print("next_gc=", memstats.next_gc, " bytesMarked=", work.bytesMarked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "\n") 1499 throw("next_gc underflow") 1500 } 1501 1502 // Update other GC heap size stats. 1503 memstats.heap_live = work.bytesMarked 1504 memstats.heap_marked = work.bytesMarked 1505 memstats.heap_scan = uint64(gcController.scanWork) 1506 1507 minNextGC := memstats.heap_live + sweepMinHeapDistance*uint64(gcpercent)/100 1508 if memstats.next_gc < minNextGC { 1509 // The allocated heap is already past the trigger. 1510 // This can happen if the triggerRatio is very low and 1511 // the reachable heap estimate is less than the live 1512 // heap size. 1513 // 1514 // Concurrent sweep happens in the heap growth from 1515 // heap_live to next_gc, so bump next_gc up to ensure 1516 // that concurrent sweep has some heap growth in which 1517 // to perform sweeping before we start the next GC 1518 // cycle. 1519 memstats.next_gc = minNextGC 1520 } 1521 1522 if trace.enabled { 1523 traceHeapAlloc() 1524 traceNextGC() 1525 } 1526 } 1527 1528 func gcSweep(mode int) { 1529 if gcphase != _GCoff { 1530 throw("gcSweep being done but phase is not GCoff") 1531 } 1532 gcCopySpans() 1533 1534 lock(&mheap_.lock) 1535 mheap_.sweepgen += 2 1536 mheap_.sweepdone = 0 1537 sweep.spanidx = 0 1538 unlock(&mheap_.lock) 1539 1540 if !_ConcurrentSweep || mode == gcForceBlockMode { 1541 // Special case synchronous sweep. 1542 // Record that no proportional sweeping has to happen. 1543 lock(&mheap_.lock) 1544 mheap_.sweepPagesPerByte = 0 1545 mheap_.pagesSwept = 0 1546 unlock(&mheap_.lock) 1547 // Sweep all spans eagerly. 1548 for sweepone() != ^uintptr(0) { 1549 sweep.npausesweep++ 1550 } 1551 // Do an additional mProf_GC, because all 'free' events are now real as well. 1552 mProf_GC() 1553 mProf_GC() 1554 return 1555 } 1556 1557 // Account how much sweeping needs to be done before the next 1558 // GC cycle and set up proportional sweep statistics. 1559 var pagesToSweep uintptr 1560 for _, s := range work.spans { 1561 if s.state == mSpanInUse { 1562 pagesToSweep += s.npages 1563 } 1564 } 1565 heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live) 1566 // Add a little margin so rounding errors and concurrent 1567 // sweep are less likely to leave pages unswept when GC starts. 1568 heapDistance -= 1024 * 1024 1569 if heapDistance < _PageSize { 1570 // Avoid setting the sweep ratio extremely high 1571 heapDistance = _PageSize 1572 } 1573 lock(&mheap_.lock) 1574 mheap_.sweepPagesPerByte = float64(pagesToSweep) / float64(heapDistance) 1575 mheap_.pagesSwept = 0 1576 mheap_.spanBytesAlloc = 0 1577 unlock(&mheap_.lock) 1578 1579 // Background sweep. 1580 lock(&sweep.lock) 1581 if sweep.parked { 1582 sweep.parked = false 1583 ready(sweep.g, 0) 1584 } 1585 unlock(&sweep.lock) 1586 mProf_GC() 1587 } 1588 1589 func gcCopySpans() { 1590 // Cache runtime.mheap_.allspans in work.spans to avoid conflicts with 1591 // resizing/freeing allspans. 1592 // New spans can be created while GC progresses, but they are not garbage for 1593 // this round: 1594 // - new stack spans can be created even while the world is stopped. 1595 // - new malloc spans can be created during the concurrent sweep 1596 // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap. 1597 lock(&mheap_.lock) 1598 // Free the old cached mark array if necessary. 1599 if work.spans != nil && &work.spans[0] != &h_allspans[0] { 1600 sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys) 1601 } 1602 // Cache the current array for sweeping. 1603 mheap_.gcspans = mheap_.allspans 1604 work.spans = h_allspans 1605 unlock(&mheap_.lock) 1606 } 1607 1608 // gcResetGState resets the GC state of all G's and returns the length 1609 // of allgs. 1610 func gcResetGState() (numgs int) { 1611 // This may be called during a concurrent phase, so make sure 1612 // allgs doesn't change. 1613 lock(&allglock) 1614 for _, gp := range allgs { 1615 gp.gcscandone = false // set to true in gcphasework 1616 gp.gcscanvalid = false // stack has not been scanned 1617 gp.gcalloc = 0 1618 gp.gcscanwork = 0 1619 } 1620 numgs = len(allgs) 1621 unlock(&allglock) 1622 return 1623 } 1624 1625 // gcResetMarkState resets state prior to marking (concurrent or STW). 1626 // 1627 // TODO(austin): Merge with gcResetGState. See issue #11427. 1628 func gcResetMarkState() { 1629 work.bytesMarked = 0 1630 work.initialHeapLive = memstats.heap_live 1631 } 1632 1633 // Hooks for other packages 1634 1635 var poolcleanup func() 1636 1637 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup 1638 func sync_runtime_registerPoolCleanup(f func()) { 1639 poolcleanup = f 1640 } 1641 1642 func clearpools() { 1643 // clear sync.Pools 1644 if poolcleanup != nil { 1645 poolcleanup() 1646 } 1647 1648 // Clear central sudog cache. 1649 // Leave per-P caches alone, they have strictly bounded size. 1650 // Disconnect cached list before dropping it on the floor, 1651 // so that a dangling ref to one entry does not pin all of them. 1652 lock(&sched.sudoglock) 1653 var sg, sgnext *sudog 1654 for sg = sched.sudogcache; sg != nil; sg = sgnext { 1655 sgnext = sg.next 1656 sg.next = nil 1657 } 1658 sched.sudogcache = nil 1659 unlock(&sched.sudoglock) 1660 1661 // Clear central defer pools. 1662 // Leave per-P pools alone, they have strictly bounded size. 1663 lock(&sched.deferlock) 1664 for i := range sched.deferpool { 1665 // disconnect cached list before dropping it on the floor, 1666 // so that a dangling ref to one entry does not pin all of them. 1667 var d, dlink *_defer 1668 for d = sched.deferpool[i]; d != nil; d = dlink { 1669 dlink = d.link 1670 d.link = nil 1671 } 1672 sched.deferpool[i] = nil 1673 } 1674 unlock(&sched.deferlock) 1675 1676 for _, p := range &allp { 1677 if p == nil { 1678 break 1679 } 1680 // clear tinyalloc pool 1681 if c := p.mcache; c != nil { 1682 c.tiny = nil 1683 c.tinyoffset = 0 1684 } 1685 } 1686 } 1687 1688 // Timing 1689 1690 //go:nowritebarrier 1691 func gchelper() { 1692 _g_ := getg() 1693 _g_.m.traceback = 2 1694 gchelperstart() 1695 1696 if trace.enabled { 1697 traceGCScanStart() 1698 } 1699 1700 // parallel mark for over GC roots 1701 parfordo(work.markfor) 1702 if gcphase != _GCscan { 1703 var gcw gcWork 1704 gcDrain(&gcw, -1) // blocks in getfull 1705 gcw.dispose() 1706 } 1707 1708 if trace.enabled { 1709 traceGCScanDone() 1710 } 1711 1712 nproc := work.nproc // work.nproc can change right after we increment work.ndone 1713 if xadd(&work.ndone, +1) == nproc-1 { 1714 notewakeup(&work.alldone) 1715 } 1716 _g_.m.traceback = 0 1717 } 1718 1719 func gchelperstart() { 1720 _g_ := getg() 1721 1722 if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { 1723 throw("gchelperstart: bad m->helpgc") 1724 } 1725 if _g_ != _g_.m.g0 { 1726 throw("gchelper not running on g0 stack") 1727 } 1728 } 1729 1730 // itoaDiv formats val/(10**dec) into buf. 1731 func itoaDiv(buf []byte, val uint64, dec int) []byte { 1732 i := len(buf) - 1 1733 idec := i - dec 1734 for val >= 10 || i >= idec { 1735 buf[i] = byte(val%10 + '0') 1736 i-- 1737 if i == idec { 1738 buf[i] = '.' 1739 i-- 1740 } 1741 val /= 10 1742 } 1743 buf[i] = byte(val + '0') 1744 return buf[i:] 1745 } 1746 1747 // fmtNSAsMS nicely formats ns nanoseconds as milliseconds. 1748 func fmtNSAsMS(buf []byte, ns uint64) []byte { 1749 if ns >= 10e6 { 1750 // Format as whole milliseconds. 1751 return itoaDiv(buf, ns/1e6, 0) 1752 } 1753 // Format two digits of precision, with at most three decimal places. 1754 x := ns / 1e3 1755 if x == 0 { 1756 buf[0] = '0' 1757 return buf[:1] 1758 } 1759 dec := 3 1760 for x >= 100 { 1761 x /= 10 1762 dec-- 1763 } 1764 return itoaDiv(buf, x, dec) 1765 } 1766