1 /* 2 3 Reference Cycle Garbage Collection 4 ================================== 5 6 Neil Schemenauer <nas (at) arctrix.com> 7 8 Based on a post on the python-dev list. Ideas from Guido van Rossum, 9 Eric Tiedemann, and various others. 10 11 http://www.arctrix.com/nas/python/gc/ 12 13 The following mailing list threads provide a historical perspective on 14 the design of this module. Note that a fair amount of refinement has 15 occurred since those discussions. 16 17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html 18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html 19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html 20 21 For a highlevel view of the collection process, read the collect 22 function. 23 24 */ 25 26 #include "Python.h" 27 #include "frameobject.h" /* for PyFrame_ClearFreeList */ 28 #include "pydtrace.h" 29 #include "pytime.h" /* for _PyTime_GetMonotonicClock() */ 30 31 /* Get an object's GC head */ 32 #define AS_GC(o) ((PyGC_Head *)(o)-1) 33 34 /* Get the object given the GC head */ 35 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) 36 37 /*** Global GC state ***/ 38 39 struct gc_generation { 40 PyGC_Head head; 41 int threshold; /* collection threshold */ 42 int count; /* count of allocations or collections of younger 43 generations */ 44 }; 45 46 #define NUM_GENERATIONS 3 47 #define GEN_HEAD(n) (&generations[n].head) 48 49 /* linked lists of container objects */ 50 static struct gc_generation generations[NUM_GENERATIONS] = { 51 /* PyGC_Head, threshold, count */ 52 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, 53 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, 54 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, 55 }; 56 57 PyGC_Head *_PyGC_generation0 = GEN_HEAD(0); 58 59 static int enabled = 1; /* automatic collection enabled? */ 60 61 /* true if we are currently running the collector */ 62 static int collecting = 0; 63 64 /* list of uncollectable objects */ 65 static PyObject *garbage = NULL; 66 67 /* Python string to use if unhandled exception occurs */ 68 static PyObject *gc_str = NULL; 69 70 /* a list of callbacks to be invoked when collection is performed */ 71 static PyObject *callbacks = NULL; 72 73 /* This is the number of objects that survived the last full collection. It 74 approximates the number of long lived objects tracked by the GC. 75 76 (by "full collection", we mean a collection of the oldest generation). 77 */ 78 static Py_ssize_t long_lived_total = 0; 79 80 /* This is the number of objects that survived all "non-full" collections, 81 and are awaiting to undergo a full collection for the first time. 82 83 */ 84 static Py_ssize_t long_lived_pending = 0; 85 86 /* 87 NOTE: about the counting of long-lived objects. 88 89 To limit the cost of garbage collection, there are two strategies; 90 - make each collection faster, e.g. by scanning fewer objects 91 - do less collections 92 This heuristic is about the latter strategy. 93 94 In addition to the various configurable thresholds, we only trigger a 95 full collection if the ratio 96 long_lived_pending / long_lived_total 97 is above a given value (hardwired to 25%). 98 99 The reason is that, while "non-full" collections (i.e., collections of 100 the young and middle generations) will always examine roughly the same 101 number of objects -- determined by the aforementioned thresholds --, 102 the cost of a full collection is proportional to the total number of 103 long-lived objects, which is virtually unbounded. 104 105 Indeed, it has been remarked that doing a full collection every 106 <constant number> of object creations entails a dramatic performance 107 degradation in workloads which consist in creating and storing lots of 108 long-lived objects (e.g. building a large list of GC-tracked objects would 109 show quadratic performance, instead of linear as expected: see issue #4074). 110 111 Using the above ratio, instead, yields amortized linear performance in 112 the total number of objects (the effect of which can be summarized 113 thusly: "each full garbage collection is more and more costly as the 114 number of objects grows, but we do fewer and fewer of them"). 115 116 This heuristic was suggested by Martin von Lwis on python-dev in 117 June 2008. His original analysis and proposal can be found at: 118 http://mail.python.org/pipermail/python-dev/2008-June/080579.html 119 */ 120 121 /* 122 NOTE: about untracking of mutable objects. 123 124 Certain types of container cannot participate in a reference cycle, and 125 so do not need to be tracked by the garbage collector. Untracking these 126 objects reduces the cost of garbage collections. However, determining 127 which objects may be untracked is not free, and the costs must be 128 weighed against the benefits for garbage collection. 129 130 There are two possible strategies for when to untrack a container: 131 132 i) When the container is created. 133 ii) When the container is examined by the garbage collector. 134 135 Tuples containing only immutable objects (integers, strings etc, and 136 recursively, tuples of immutable objects) do not need to be tracked. 137 The interpreter creates a large number of tuples, many of which will 138 not survive until garbage collection. It is therefore not worthwhile 139 to untrack eligible tuples at creation time. 140 141 Instead, all tuples except the empty tuple are tracked when created. 142 During garbage collection it is determined whether any surviving tuples 143 can be untracked. A tuple can be untracked if all of its contents are 144 already not tracked. Tuples are examined for untracking in all garbage 145 collection cycles. It may take more than one cycle to untrack a tuple. 146 147 Dictionaries containing only immutable objects also do not need to be 148 tracked. Dictionaries are untracked when created. If a tracked item is 149 inserted into a dictionary (either as a key or value), the dictionary 150 becomes tracked. During a full garbage collection (all generations), 151 the collector will untrack any dictionaries whose contents are not 152 tracked. 153 154 The module provides the python function is_tracked(obj), which returns 155 the CURRENT tracking status of the object. Subsequent garbage 156 collections may change the tracking status of the object. 157 158 Untracking of certain containers was introduced in issue #4688, and 159 the algorithm was refined in response to issue #14775. 160 */ 161 162 /* set for debugging information */ 163 #define DEBUG_STATS (1<<0) /* print collection statistics */ 164 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ 165 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ 166 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ 167 #define DEBUG_LEAK DEBUG_COLLECTABLE | \ 168 DEBUG_UNCOLLECTABLE | \ 169 DEBUG_SAVEALL 170 static int debug; 171 172 /* Running stats per generation */ 173 struct gc_generation_stats { 174 /* total number of collections */ 175 Py_ssize_t collections; 176 /* total number of collected objects */ 177 Py_ssize_t collected; 178 /* total number of uncollectable objects (put into gc.garbage) */ 179 Py_ssize_t uncollectable; 180 }; 181 182 static struct gc_generation_stats generation_stats[NUM_GENERATIONS]; 183 184 /*-------------------------------------------------------------------------- 185 gc_refs values. 186 187 Between collections, every gc'ed object has one of two gc_refs values: 188 189 GC_UNTRACKED 190 The initial state; objects returned by PyObject_GC_Malloc are in this 191 state. The object doesn't live in any generation list, and its 192 tp_traverse slot must not be called. 193 194 GC_REACHABLE 195 The object lives in some generation list, and its tp_traverse is safe to 196 call. An object transitions to GC_REACHABLE when PyObject_GC_Track 197 is called. 198 199 During a collection, gc_refs can temporarily take on other states: 200 201 >= 0 202 At the start of a collection, update_refs() copies the true refcount 203 to gc_refs, for each object in the generation being collected. 204 subtract_refs() then adjusts gc_refs so that it equals the number of 205 times an object is referenced directly from outside the generation 206 being collected. 207 gc_refs remains >= 0 throughout these steps. 208 209 GC_TENTATIVELY_UNREACHABLE 210 move_unreachable() then moves objects not reachable (whether directly or 211 indirectly) from outside the generation into an "unreachable" set. 212 Objects that are found to be reachable have gc_refs set to GC_REACHABLE 213 again. Objects that are found to be unreachable have gc_refs set to 214 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing 215 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may 216 transition back to GC_REACHABLE. 217 218 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates 219 for collection. If it's decided not to collect such an object (e.g., 220 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again. 221 ---------------------------------------------------------------------------- 222 */ 223 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED 224 #define GC_REACHABLE _PyGC_REFS_REACHABLE 225 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE 226 227 #define IS_TRACKED(o) (_PyGC_REFS(o) != GC_UNTRACKED) 228 #define IS_REACHABLE(o) (_PyGC_REFS(o) == GC_REACHABLE) 229 #define IS_TENTATIVELY_UNREACHABLE(o) ( \ 230 _PyGC_REFS(o) == GC_TENTATIVELY_UNREACHABLE) 231 232 /*** list functions ***/ 233 234 static void 235 gc_list_init(PyGC_Head *list) 236 { 237 list->gc.gc_prev = list; 238 list->gc.gc_next = list; 239 } 240 241 static int 242 gc_list_is_empty(PyGC_Head *list) 243 { 244 return (list->gc.gc_next == list); 245 } 246 247 #if 0 248 /* This became unused after gc_list_move() was introduced. */ 249 /* Append `node` to `list`. */ 250 static void 251 gc_list_append(PyGC_Head *node, PyGC_Head *list) 252 { 253 node->gc.gc_next = list; 254 node->gc.gc_prev = list->gc.gc_prev; 255 node->gc.gc_prev->gc.gc_next = node; 256 list->gc.gc_prev = node; 257 } 258 #endif 259 260 /* Remove `node` from the gc list it's currently in. */ 261 static void 262 gc_list_remove(PyGC_Head *node) 263 { 264 node->gc.gc_prev->gc.gc_next = node->gc.gc_next; 265 node->gc.gc_next->gc.gc_prev = node->gc.gc_prev; 266 node->gc.gc_next = NULL; /* object is not currently tracked */ 267 } 268 269 /* Move `node` from the gc list it's currently in (which is not explicitly 270 * named here) to the end of `list`. This is semantically the same as 271 * gc_list_remove(node) followed by gc_list_append(node, list). 272 */ 273 static void 274 gc_list_move(PyGC_Head *node, PyGC_Head *list) 275 { 276 PyGC_Head *new_prev; 277 PyGC_Head *current_prev = node->gc.gc_prev; 278 PyGC_Head *current_next = node->gc.gc_next; 279 /* Unlink from current list. */ 280 current_prev->gc.gc_next = current_next; 281 current_next->gc.gc_prev = current_prev; 282 /* Relink at end of new list. */ 283 new_prev = node->gc.gc_prev = list->gc.gc_prev; 284 new_prev->gc.gc_next = list->gc.gc_prev = node; 285 node->gc.gc_next = list; 286 } 287 288 /* append list `from` onto list `to`; `from` becomes an empty list */ 289 static void 290 gc_list_merge(PyGC_Head *from, PyGC_Head *to) 291 { 292 PyGC_Head *tail; 293 assert(from != to); 294 if (!gc_list_is_empty(from)) { 295 tail = to->gc.gc_prev; 296 tail->gc.gc_next = from->gc.gc_next; 297 tail->gc.gc_next->gc.gc_prev = tail; 298 to->gc.gc_prev = from->gc.gc_prev; 299 to->gc.gc_prev->gc.gc_next = to; 300 } 301 gc_list_init(from); 302 } 303 304 static Py_ssize_t 305 gc_list_size(PyGC_Head *list) 306 { 307 PyGC_Head *gc; 308 Py_ssize_t n = 0; 309 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { 310 n++; 311 } 312 return n; 313 } 314 315 /* Append objects in a GC list to a Python list. 316 * Return 0 if all OK, < 0 if error (out of memory for list). 317 */ 318 static int 319 append_objects(PyObject *py_list, PyGC_Head *gc_list) 320 { 321 PyGC_Head *gc; 322 for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) { 323 PyObject *op = FROM_GC(gc); 324 if (op != py_list) { 325 if (PyList_Append(py_list, op)) { 326 return -1; /* exception */ 327 } 328 } 329 } 330 return 0; 331 } 332 333 /*** end of list stuff ***/ 334 335 336 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects 337 * in containers, and is GC_REACHABLE for all tracked gc objects not in 338 * containers. 339 */ 340 static void 341 update_refs(PyGC_Head *containers) 342 { 343 PyGC_Head *gc = containers->gc.gc_next; 344 for (; gc != containers; gc = gc->gc.gc_next) { 345 assert(_PyGCHead_REFS(gc) == GC_REACHABLE); 346 _PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc))); 347 /* Python's cyclic gc should never see an incoming refcount 348 * of 0: if something decref'ed to 0, it should have been 349 * deallocated immediately at that time. 350 * Possible cause (if the assert triggers): a tp_dealloc 351 * routine left a gc-aware object tracked during its teardown 352 * phase, and did something-- or allowed something to happen -- 353 * that called back into Python. gc can trigger then, and may 354 * see the still-tracked dying object. Before this assert 355 * was added, such mistakes went on to allow gc to try to 356 * delete the object again. In a debug build, that caused 357 * a mysterious segfault, when _Py_ForgetReference tried 358 * to remove the object from the doubly-linked list of all 359 * objects a second time. In a release build, an actual 360 * double deallocation occurred, which leads to corruption 361 * of the allocator's internal bookkeeping pointers. That's 362 * so serious that maybe this should be a release-build 363 * check instead of an assert? 364 */ 365 assert(_PyGCHead_REFS(gc) != 0); 366 } 367 } 368 369 /* A traversal callback for subtract_refs. */ 370 static int 371 visit_decref(PyObject *op, void *data) 372 { 373 assert(op != NULL); 374 if (PyObject_IS_GC(op)) { 375 PyGC_Head *gc = AS_GC(op); 376 /* We're only interested in gc_refs for objects in the 377 * generation being collected, which can be recognized 378 * because only they have positive gc_refs. 379 */ 380 assert(_PyGCHead_REFS(gc) != 0); /* else refcount was too small */ 381 if (_PyGCHead_REFS(gc) > 0) 382 _PyGCHead_DECREF(gc); 383 } 384 return 0; 385 } 386 387 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0 388 * for all objects in containers, and is GC_REACHABLE for all tracked gc 389 * objects not in containers. The ones with gc_refs > 0 are directly 390 * reachable from outside containers, and so can't be collected. 391 */ 392 static void 393 subtract_refs(PyGC_Head *containers) 394 { 395 traverseproc traverse; 396 PyGC_Head *gc = containers->gc.gc_next; 397 for (; gc != containers; gc=gc->gc.gc_next) { 398 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; 399 (void) traverse(FROM_GC(gc), 400 (visitproc)visit_decref, 401 NULL); 402 } 403 } 404 405 /* A traversal callback for move_unreachable. */ 406 static int 407 visit_reachable(PyObject *op, PyGC_Head *reachable) 408 { 409 if (PyObject_IS_GC(op)) { 410 PyGC_Head *gc = AS_GC(op); 411 const Py_ssize_t gc_refs = _PyGCHead_REFS(gc); 412 413 if (gc_refs == 0) { 414 /* This is in move_unreachable's 'young' list, but 415 * the traversal hasn't yet gotten to it. All 416 * we need to do is tell move_unreachable that it's 417 * reachable. 418 */ 419 _PyGCHead_SET_REFS(gc, 1); 420 } 421 else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) { 422 /* This had gc_refs = 0 when move_unreachable got 423 * to it, but turns out it's reachable after all. 424 * Move it back to move_unreachable's 'young' list, 425 * and move_unreachable will eventually get to it 426 * again. 427 */ 428 gc_list_move(gc, reachable); 429 _PyGCHead_SET_REFS(gc, 1); 430 } 431 /* Else there's nothing to do. 432 * If gc_refs > 0, it must be in move_unreachable's 'young' 433 * list, and move_unreachable will eventually get to it. 434 * If gc_refs == GC_REACHABLE, it's either in some other 435 * generation so we don't care about it, or move_unreachable 436 * already dealt with it. 437 * If gc_refs == GC_UNTRACKED, it must be ignored. 438 */ 439 else { 440 assert(gc_refs > 0 441 || gc_refs == GC_REACHABLE 442 || gc_refs == GC_UNTRACKED); 443 } 444 } 445 return 0; 446 } 447 448 /* Move the unreachable objects from young to unreachable. After this, 449 * all objects in young have gc_refs = GC_REACHABLE, and all objects in 450 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked 451 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE. 452 * All objects in young after this are directly or indirectly reachable 453 * from outside the original young; and all objects in unreachable are 454 * not. 455 */ 456 static void 457 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) 458 { 459 PyGC_Head *gc = young->gc.gc_next; 460 461 /* Invariants: all objects "to the left" of us in young have gc_refs 462 * = GC_REACHABLE, and are indeed reachable (directly or indirectly) 463 * from outside the young list as it was at entry. All other objects 464 * from the original young "to the left" of us are in unreachable now, 465 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the 466 * left of us in 'young' now have been scanned, and no objects here 467 * or to the right have been scanned yet. 468 */ 469 470 while (gc != young) { 471 PyGC_Head *next; 472 473 if (_PyGCHead_REFS(gc)) { 474 /* gc is definitely reachable from outside the 475 * original 'young'. Mark it as such, and traverse 476 * its pointers to find any other objects that may 477 * be directly reachable from it. Note that the 478 * call to tp_traverse may append objects to young, 479 * so we have to wait until it returns to determine 480 * the next object to visit. 481 */ 482 PyObject *op = FROM_GC(gc); 483 traverseproc traverse = Py_TYPE(op)->tp_traverse; 484 assert(_PyGCHead_REFS(gc) > 0); 485 _PyGCHead_SET_REFS(gc, GC_REACHABLE); 486 (void) traverse(op, 487 (visitproc)visit_reachable, 488 (void *)young); 489 next = gc->gc.gc_next; 490 if (PyTuple_CheckExact(op)) { 491 _PyTuple_MaybeUntrack(op); 492 } 493 } 494 else { 495 /* This *may* be unreachable. To make progress, 496 * assume it is. gc isn't directly reachable from 497 * any object we've already traversed, but may be 498 * reachable from an object we haven't gotten to yet. 499 * visit_reachable will eventually move gc back into 500 * young if that's so, and we'll see it again. 501 */ 502 next = gc->gc.gc_next; 503 gc_list_move(gc, unreachable); 504 _PyGCHead_SET_REFS(gc, GC_TENTATIVELY_UNREACHABLE); 505 } 506 gc = next; 507 } 508 } 509 510 /* Try to untrack all currently tracked dictionaries */ 511 static void 512 untrack_dicts(PyGC_Head *head) 513 { 514 PyGC_Head *next, *gc = head->gc.gc_next; 515 while (gc != head) { 516 PyObject *op = FROM_GC(gc); 517 next = gc->gc.gc_next; 518 if (PyDict_CheckExact(op)) 519 _PyDict_MaybeUntrack(op); 520 gc = next; 521 } 522 } 523 524 /* Return true if object has a pre-PEP 442 finalization method. */ 525 static int 526 has_legacy_finalizer(PyObject *op) 527 { 528 return op->ob_type->tp_del != NULL; 529 } 530 531 /* Move the objects in unreachable with tp_del slots into `finalizers`. 532 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the 533 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. 534 */ 535 static void 536 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) 537 { 538 PyGC_Head *gc; 539 PyGC_Head *next; 540 541 /* March over unreachable. Move objects with finalizers into 542 * `finalizers`. 543 */ 544 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { 545 PyObject *op = FROM_GC(gc); 546 547 assert(IS_TENTATIVELY_UNREACHABLE(op)); 548 next = gc->gc.gc_next; 549 550 if (has_legacy_finalizer(op)) { 551 gc_list_move(gc, finalizers); 552 _PyGCHead_SET_REFS(gc, GC_REACHABLE); 553 } 554 } 555 } 556 557 /* A traversal callback for move_legacy_finalizer_reachable. */ 558 static int 559 visit_move(PyObject *op, PyGC_Head *tolist) 560 { 561 if (PyObject_IS_GC(op)) { 562 if (IS_TENTATIVELY_UNREACHABLE(op)) { 563 PyGC_Head *gc = AS_GC(op); 564 gc_list_move(gc, tolist); 565 _PyGCHead_SET_REFS(gc, GC_REACHABLE); 566 } 567 } 568 return 0; 569 } 570 571 /* Move objects that are reachable from finalizers, from the unreachable set 572 * into finalizers set. 573 */ 574 static void 575 move_legacy_finalizer_reachable(PyGC_Head *finalizers) 576 { 577 traverseproc traverse; 578 PyGC_Head *gc = finalizers->gc.gc_next; 579 for (; gc != finalizers; gc = gc->gc.gc_next) { 580 /* Note that the finalizers list may grow during this. */ 581 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; 582 (void) traverse(FROM_GC(gc), 583 (visitproc)visit_move, 584 (void *)finalizers); 585 } 586 } 587 588 /* Clear all weakrefs to unreachable objects, and if such a weakref has a 589 * callback, invoke it if necessary. Note that it's possible for such 590 * weakrefs to be outside the unreachable set -- indeed, those are precisely 591 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for 592 * overview & some details. Some weakrefs with callbacks may be reclaimed 593 * directly by this routine; the number reclaimed is the return value. Other 594 * weakrefs with callbacks may be moved into the `old` generation. Objects 595 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in 596 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, 597 * no object in `unreachable` is weakly referenced anymore. 598 */ 599 static int 600 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) 601 { 602 PyGC_Head *gc; 603 PyObject *op; /* generally FROM_GC(gc) */ 604 PyWeakReference *wr; /* generally a cast of op */ 605 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ 606 PyGC_Head *next; 607 int num_freed = 0; 608 609 gc_list_init(&wrcb_to_call); 610 611 /* Clear all weakrefs to the objects in unreachable. If such a weakref 612 * also has a callback, move it into `wrcb_to_call` if the callback 613 * needs to be invoked. Note that we cannot invoke any callbacks until 614 * all weakrefs to unreachable objects are cleared, lest the callback 615 * resurrect an unreachable object via a still-active weakref. We 616 * make another pass over wrcb_to_call, invoking callbacks, after this 617 * pass completes. 618 */ 619 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { 620 PyWeakReference **wrlist; 621 622 op = FROM_GC(gc); 623 assert(IS_TENTATIVELY_UNREACHABLE(op)); 624 next = gc->gc.gc_next; 625 626 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) 627 continue; 628 629 /* It supports weakrefs. Does it have any? */ 630 wrlist = (PyWeakReference **) 631 PyObject_GET_WEAKREFS_LISTPTR(op); 632 633 /* `op` may have some weakrefs. March over the list, clear 634 * all the weakrefs, and move the weakrefs with callbacks 635 * that must be called into wrcb_to_call. 636 */ 637 for (wr = *wrlist; wr != NULL; wr = *wrlist) { 638 PyGC_Head *wrasgc; /* AS_GC(wr) */ 639 640 /* _PyWeakref_ClearRef clears the weakref but leaves 641 * the callback pointer intact. Obscure: it also 642 * changes *wrlist. 643 */ 644 assert(wr->wr_object == op); 645 _PyWeakref_ClearRef(wr); 646 assert(wr->wr_object == Py_None); 647 if (wr->wr_callback == NULL) 648 continue; /* no callback */ 649 650 /* Headache time. `op` is going away, and is weakly referenced by 651 * `wr`, which has a callback. Should the callback be invoked? If wr 652 * is also trash, no: 653 * 654 * 1. There's no need to call it. The object and the weakref are 655 * both going away, so it's legitimate to pretend the weakref is 656 * going away first. The user has to ensure a weakref outlives its 657 * referent if they want a guarantee that the wr callback will get 658 * invoked. 659 * 660 * 2. It may be catastrophic to call it. If the callback is also in 661 * cyclic trash (CT), then although the CT is unreachable from 662 * outside the current generation, CT may be reachable from the 663 * callback. Then the callback could resurrect insane objects. 664 * 665 * Since the callback is never needed and may be unsafe in this case, 666 * wr is simply left in the unreachable set. Note that because we 667 * already called _PyWeakref_ClearRef(wr), its callback will never 668 * trigger. 669 * 670 * OTOH, if wr isn't part of CT, we should invoke the callback: the 671 * weakref outlived the trash. Note that since wr isn't CT in this 672 * case, its callback can't be CT either -- wr acted as an external 673 * root to this generation, and therefore its callback did too. So 674 * nothing in CT is reachable from the callback either, so it's hard 675 * to imagine how calling it later could create a problem for us. wr 676 * is moved to wrcb_to_call in this case. 677 */ 678 if (IS_TENTATIVELY_UNREACHABLE(wr)) 679 continue; 680 assert(IS_REACHABLE(wr)); 681 682 /* Create a new reference so that wr can't go away 683 * before we can process it again. 684 */ 685 Py_INCREF(wr); 686 687 /* Move wr to wrcb_to_call, for the next pass. */ 688 wrasgc = AS_GC(wr); 689 assert(wrasgc != next); /* wrasgc is reachable, but 690 next isn't, so they can't 691 be the same */ 692 gc_list_move(wrasgc, &wrcb_to_call); 693 } 694 } 695 696 /* Invoke the callbacks we decided to honor. It's safe to invoke them 697 * because they can't reference unreachable objects. 698 */ 699 while (! gc_list_is_empty(&wrcb_to_call)) { 700 PyObject *temp; 701 PyObject *callback; 702 703 gc = wrcb_to_call.gc.gc_next; 704 op = FROM_GC(gc); 705 assert(IS_REACHABLE(op)); 706 assert(PyWeakref_Check(op)); 707 wr = (PyWeakReference *)op; 708 callback = wr->wr_callback; 709 assert(callback != NULL); 710 711 /* copy-paste of weakrefobject.c's handle_callback() */ 712 temp = PyObject_CallFunctionObjArgs(callback, wr, NULL); 713 if (temp == NULL) 714 PyErr_WriteUnraisable(callback); 715 else 716 Py_DECREF(temp); 717 718 /* Give up the reference we created in the first pass. When 719 * op's refcount hits 0 (which it may or may not do right now), 720 * op's tp_dealloc will decref op->wr_callback too. Note 721 * that the refcount probably will hit 0 now, and because this 722 * weakref was reachable to begin with, gc didn't already 723 * add it to its count of freed objects. Example: a reachable 724 * weak value dict maps some key to this reachable weakref. 725 * The callback removes this key->weakref mapping from the 726 * dict, leaving no other references to the weakref (excepting 727 * ours). 728 */ 729 Py_DECREF(op); 730 if (wrcb_to_call.gc.gc_next == gc) { 731 /* object is still alive -- move it */ 732 gc_list_move(gc, old); 733 } 734 else 735 ++num_freed; 736 } 737 738 return num_freed; 739 } 740 741 static void 742 debug_cycle(const char *msg, PyObject *op) 743 { 744 PySys_FormatStderr("gc: %s <%s %p>\n", 745 msg, Py_TYPE(op)->tp_name, op); 746 } 747 748 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable 749 * only from such cycles). 750 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module 751 * garbage list (a Python list), else only the objects in finalizers with 752 * __del__ methods are appended to garbage. All objects in finalizers are 753 * merged into the old list regardless. 754 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list). 755 * The finalizers list is made empty on a successful return. 756 */ 757 static int 758 handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old) 759 { 760 PyGC_Head *gc = finalizers->gc.gc_next; 761 762 if (garbage == NULL) { 763 garbage = PyList_New(0); 764 if (garbage == NULL) 765 Py_FatalError("gc couldn't create gc.garbage list"); 766 } 767 for (; gc != finalizers; gc = gc->gc.gc_next) { 768 PyObject *op = FROM_GC(gc); 769 770 if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { 771 if (PyList_Append(garbage, op) < 0) 772 return -1; 773 } 774 } 775 776 gc_list_merge(finalizers, old); 777 return 0; 778 } 779 780 /* Run first-time finalizers (if any) on all the objects in collectable. 781 * Note that this may remove some (or even all) of the objects from the 782 * list, due to refcounts falling to 0. 783 */ 784 static void 785 finalize_garbage(PyGC_Head *collectable) 786 { 787 destructor finalize; 788 PyGC_Head seen; 789 790 /* While we're going through the loop, `finalize(op)` may cause op, or 791 * other objects, to be reclaimed via refcounts falling to zero. So 792 * there's little we can rely on about the structure of the input 793 * `collectable` list across iterations. For safety, we always take the 794 * first object in that list and move it to a temporary `seen` list. 795 * If objects vanish from the `collectable` and `seen` lists we don't 796 * care. 797 */ 798 gc_list_init(&seen); 799 800 while (!gc_list_is_empty(collectable)) { 801 PyGC_Head *gc = collectable->gc.gc_next; 802 PyObject *op = FROM_GC(gc); 803 gc_list_move(gc, &seen); 804 if (!_PyGCHead_FINALIZED(gc) && 805 PyType_HasFeature(Py_TYPE(op), Py_TPFLAGS_HAVE_FINALIZE) && 806 (finalize = Py_TYPE(op)->tp_finalize) != NULL) { 807 _PyGCHead_SET_FINALIZED(gc, 1); 808 Py_INCREF(op); 809 finalize(op); 810 Py_DECREF(op); 811 } 812 } 813 gc_list_merge(&seen, collectable); 814 } 815 816 /* Walk the collectable list and check that they are really unreachable 817 from the outside (some objects could have been resurrected by a 818 finalizer). */ 819 static int 820 check_garbage(PyGC_Head *collectable) 821 { 822 PyGC_Head *gc; 823 for (gc = collectable->gc.gc_next; gc != collectable; 824 gc = gc->gc.gc_next) { 825 _PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc))); 826 assert(_PyGCHead_REFS(gc) != 0); 827 } 828 subtract_refs(collectable); 829 for (gc = collectable->gc.gc_next; gc != collectable; 830 gc = gc->gc.gc_next) { 831 assert(_PyGCHead_REFS(gc) >= 0); 832 if (_PyGCHead_REFS(gc) != 0) 833 return -1; 834 } 835 return 0; 836 } 837 838 static void 839 revive_garbage(PyGC_Head *collectable) 840 { 841 PyGC_Head *gc; 842 for (gc = collectable->gc.gc_next; gc != collectable; 843 gc = gc->gc.gc_next) { 844 _PyGCHead_SET_REFS(gc, GC_REACHABLE); 845 } 846 } 847 848 /* Break reference cycles by clearing the containers involved. This is 849 * tricky business as the lists can be changing and we don't know which 850 * objects may be freed. It is possible I screwed something up here. 851 */ 852 static void 853 delete_garbage(PyGC_Head *collectable, PyGC_Head *old) 854 { 855 inquiry clear; 856 857 while (!gc_list_is_empty(collectable)) { 858 PyGC_Head *gc = collectable->gc.gc_next; 859 PyObject *op = FROM_GC(gc); 860 861 if (debug & DEBUG_SAVEALL) { 862 PyList_Append(garbage, op); 863 } 864 else { 865 if ((clear = Py_TYPE(op)->tp_clear) != NULL) { 866 Py_INCREF(op); 867 clear(op); 868 Py_DECREF(op); 869 } 870 } 871 if (collectable->gc.gc_next == gc) { 872 /* object is still alive, move it, it may die later */ 873 gc_list_move(gc, old); 874 _PyGCHead_SET_REFS(gc, GC_REACHABLE); 875 } 876 } 877 } 878 879 /* Clear all free lists 880 * All free lists are cleared during the collection of the highest generation. 881 * Allocated items in the free list may keep a pymalloc arena occupied. 882 * Clearing the free lists may give back memory to the OS earlier. 883 */ 884 static void 885 clear_freelists(void) 886 { 887 (void)PyMethod_ClearFreeList(); 888 (void)PyFrame_ClearFreeList(); 889 (void)PyCFunction_ClearFreeList(); 890 (void)PyTuple_ClearFreeList(); 891 (void)PyUnicode_ClearFreeList(); 892 (void)PyFloat_ClearFreeList(); 893 (void)PyList_ClearFreeList(); 894 (void)PyDict_ClearFreeList(); 895 (void)PySet_ClearFreeList(); 896 (void)PyAsyncGen_ClearFreeLists(); 897 } 898 899 /* This is the main function. Read this to understand how the 900 * collection process works. */ 901 static Py_ssize_t 902 collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, 903 int nofail) 904 { 905 int i; 906 Py_ssize_t m = 0; /* # objects collected */ 907 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ 908 PyGC_Head *young; /* the generation we are examining */ 909 PyGC_Head *old; /* next older generation */ 910 PyGC_Head unreachable; /* non-problematic unreachable trash */ 911 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ 912 PyGC_Head *gc; 913 _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ 914 915 struct gc_generation_stats *stats = &generation_stats[generation]; 916 917 if (debug & DEBUG_STATS) { 918 PySys_WriteStderr("gc: collecting generation %d...\n", 919 generation); 920 PySys_WriteStderr("gc: objects in each generation:"); 921 for (i = 0; i < NUM_GENERATIONS; i++) 922 PySys_FormatStderr(" %zd", 923 gc_list_size(GEN_HEAD(i))); 924 t1 = _PyTime_GetMonotonicClock(); 925 926 PySys_WriteStderr("\n"); 927 } 928 929 if (PyDTrace_GC_START_ENABLED()) 930 PyDTrace_GC_START(generation); 931 932 /* update collection and allocation counters */ 933 if (generation+1 < NUM_GENERATIONS) 934 generations[generation+1].count += 1; 935 for (i = 0; i <= generation; i++) 936 generations[i].count = 0; 937 938 /* merge younger generations with one we are currently collecting */ 939 for (i = 0; i < generation; i++) { 940 gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation)); 941 } 942 943 /* handy references */ 944 young = GEN_HEAD(generation); 945 if (generation < NUM_GENERATIONS-1) 946 old = GEN_HEAD(generation+1); 947 else 948 old = young; 949 950 /* Using ob_refcnt and gc_refs, calculate which objects in the 951 * container set are reachable from outside the set (i.e., have a 952 * refcount greater than 0 when all the references within the 953 * set are taken into account). 954 */ 955 update_refs(young); 956 subtract_refs(young); 957 958 /* Leave everything reachable from outside young in young, and move 959 * everything else (in young) to unreachable. 960 * NOTE: This used to move the reachable objects into a reachable 961 * set instead. But most things usually turn out to be reachable, 962 * so it's more efficient to move the unreachable things. 963 */ 964 gc_list_init(&unreachable); 965 move_unreachable(young, &unreachable); 966 967 /* Move reachable objects to next generation. */ 968 if (young != old) { 969 if (generation == NUM_GENERATIONS - 2) { 970 long_lived_pending += gc_list_size(young); 971 } 972 gc_list_merge(young, old); 973 } 974 else { 975 /* We only untrack dicts in full collections, to avoid quadratic 976 dict build-up. See issue #14775. */ 977 untrack_dicts(young); 978 long_lived_pending = 0; 979 long_lived_total = gc_list_size(young); 980 } 981 982 /* All objects in unreachable are trash, but objects reachable from 983 * legacy finalizers (e.g. tp_del) can't safely be deleted. 984 */ 985 gc_list_init(&finalizers); 986 move_legacy_finalizers(&unreachable, &finalizers); 987 /* finalizers contains the unreachable objects with a legacy finalizer; 988 * unreachable objects reachable *from* those are also uncollectable, 989 * and we move those into the finalizers list too. 990 */ 991 move_legacy_finalizer_reachable(&finalizers); 992 993 /* Collect statistics on collectable objects found and print 994 * debugging information. 995 */ 996 for (gc = unreachable.gc.gc_next; gc != &unreachable; 997 gc = gc->gc.gc_next) { 998 m++; 999 if (debug & DEBUG_COLLECTABLE) { 1000 debug_cycle("collectable", FROM_GC(gc)); 1001 } 1002 } 1003 1004 /* Clear weakrefs and invoke callbacks as necessary. */ 1005 m += handle_weakrefs(&unreachable, old); 1006 1007 /* Call tp_finalize on objects which have one. */ 1008 finalize_garbage(&unreachable); 1009 1010 if (check_garbage(&unreachable)) { 1011 revive_garbage(&unreachable); 1012 gc_list_merge(&unreachable, old); 1013 } 1014 else { 1015 /* Call tp_clear on objects in the unreachable set. This will cause 1016 * the reference cycles to be broken. It may also cause some objects 1017 * in finalizers to be freed. 1018 */ 1019 delete_garbage(&unreachable, old); 1020 } 1021 1022 /* Collect statistics on uncollectable objects found and print 1023 * debugging information. */ 1024 for (gc = finalizers.gc.gc_next; 1025 gc != &finalizers; 1026 gc = gc->gc.gc_next) { 1027 n++; 1028 if (debug & DEBUG_UNCOLLECTABLE) 1029 debug_cycle("uncollectable", FROM_GC(gc)); 1030 } 1031 if (debug & DEBUG_STATS) { 1032 _PyTime_t t2 = _PyTime_GetMonotonicClock(); 1033 1034 if (m == 0 && n == 0) 1035 PySys_WriteStderr("gc: done"); 1036 else 1037 PySys_FormatStderr( 1038 "gc: done, %zd unreachable, %zd uncollectable", 1039 n+m, n); 1040 PySys_WriteStderr(", %.4fs elapsed\n", 1041 _PyTime_AsSecondsDouble(t2 - t1)); 1042 } 1043 1044 /* Append instances in the uncollectable set to a Python 1045 * reachable list of garbage. The programmer has to deal with 1046 * this if they insist on creating this type of structure. 1047 */ 1048 (void)handle_legacy_finalizers(&finalizers, old); 1049 1050 /* Clear free list only during the collection of the highest 1051 * generation */ 1052 if (generation == NUM_GENERATIONS-1) { 1053 clear_freelists(); 1054 } 1055 1056 if (PyErr_Occurred()) { 1057 if (nofail) { 1058 PyErr_Clear(); 1059 } 1060 else { 1061 if (gc_str == NULL) 1062 gc_str = PyUnicode_FromString("garbage collection"); 1063 PyErr_WriteUnraisable(gc_str); 1064 Py_FatalError("unexpected exception during garbage collection"); 1065 } 1066 } 1067 1068 /* Update stats */ 1069 if (n_collected) 1070 *n_collected = m; 1071 if (n_uncollectable) 1072 *n_uncollectable = n; 1073 stats->collections++; 1074 stats->collected += m; 1075 stats->uncollectable += n; 1076 1077 if (PyDTrace_GC_DONE_ENABLED()) 1078 PyDTrace_GC_DONE(n+m); 1079 1080 return n+m; 1081 } 1082 1083 /* Invoke progress callbacks to notify clients that garbage collection 1084 * is starting or stopping 1085 */ 1086 static void 1087 invoke_gc_callback(const char *phase, int generation, 1088 Py_ssize_t collected, Py_ssize_t uncollectable) 1089 { 1090 Py_ssize_t i; 1091 PyObject *info = NULL; 1092 1093 /* we may get called very early */ 1094 if (callbacks == NULL) 1095 return; 1096 /* The local variable cannot be rebound, check it for sanity */ 1097 assert(callbacks != NULL && PyList_CheckExact(callbacks)); 1098 if (PyList_GET_SIZE(callbacks) != 0) { 1099 info = Py_BuildValue("{sisnsn}", 1100 "generation", generation, 1101 "collected", collected, 1102 "uncollectable", uncollectable); 1103 if (info == NULL) { 1104 PyErr_WriteUnraisable(NULL); 1105 return; 1106 } 1107 } 1108 for (i=0; i<PyList_GET_SIZE(callbacks); i++) { 1109 PyObject *r, *cb = PyList_GET_ITEM(callbacks, i); 1110 Py_INCREF(cb); /* make sure cb doesn't go away */ 1111 r = PyObject_CallFunction(cb, "sO", phase, info); 1112 Py_XDECREF(r); 1113 if (r == NULL) 1114 PyErr_WriteUnraisable(cb); 1115 Py_DECREF(cb); 1116 } 1117 Py_XDECREF(info); 1118 } 1119 1120 /* Perform garbage collection of a generation and invoke 1121 * progress callbacks. 1122 */ 1123 static Py_ssize_t 1124 collect_with_callback(int generation) 1125 { 1126 Py_ssize_t result, collected, uncollectable; 1127 invoke_gc_callback("start", generation, 0, 0); 1128 result = collect(generation, &collected, &uncollectable, 0); 1129 invoke_gc_callback("stop", generation, collected, uncollectable); 1130 return result; 1131 } 1132 1133 static Py_ssize_t 1134 collect_generations(void) 1135 { 1136 int i; 1137 Py_ssize_t n = 0; 1138 1139 /* Find the oldest generation (highest numbered) where the count 1140 * exceeds the threshold. Objects in the that generation and 1141 * generations younger than it will be collected. */ 1142 for (i = NUM_GENERATIONS-1; i >= 0; i--) { 1143 if (generations[i].count > generations[i].threshold) { 1144 /* Avoid quadratic performance degradation in number 1145 of tracked objects. See comments at the beginning 1146 of this file, and issue #4074. 1147 */ 1148 if (i == NUM_GENERATIONS - 1 1149 && long_lived_pending < long_lived_total / 4) 1150 continue; 1151 n = collect_with_callback(i); 1152 break; 1153 } 1154 } 1155 return n; 1156 } 1157 1158 PyDoc_STRVAR(gc_enable__doc__, 1159 "enable() -> None\n" 1160 "\n" 1161 "Enable automatic garbage collection.\n"); 1162 1163 static PyObject * 1164 gc_enable(PyObject *self, PyObject *noargs) 1165 { 1166 enabled = 1; 1167 Py_INCREF(Py_None); 1168 return Py_None; 1169 } 1170 1171 PyDoc_STRVAR(gc_disable__doc__, 1172 "disable() -> None\n" 1173 "\n" 1174 "Disable automatic garbage collection.\n"); 1175 1176 static PyObject * 1177 gc_disable(PyObject *self, PyObject *noargs) 1178 { 1179 enabled = 0; 1180 Py_INCREF(Py_None); 1181 return Py_None; 1182 } 1183 1184 PyDoc_STRVAR(gc_isenabled__doc__, 1185 "isenabled() -> status\n" 1186 "\n" 1187 "Returns true if automatic garbage collection is enabled.\n"); 1188 1189 static PyObject * 1190 gc_isenabled(PyObject *self, PyObject *noargs) 1191 { 1192 return PyBool_FromLong((long)enabled); 1193 } 1194 1195 PyDoc_STRVAR(gc_collect__doc__, 1196 "collect([generation]) -> n\n" 1197 "\n" 1198 "With no arguments, run a full collection. The optional argument\n" 1199 "may be an integer specifying which generation to collect. A ValueError\n" 1200 "is raised if the generation number is invalid.\n\n" 1201 "The number of unreachable objects is returned.\n"); 1202 1203 static PyObject * 1204 gc_collect(PyObject *self, PyObject *args, PyObject *kws) 1205 { 1206 static char *keywords[] = {"generation", NULL}; 1207 int genarg = NUM_GENERATIONS - 1; 1208 Py_ssize_t n; 1209 1210 if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg)) 1211 return NULL; 1212 1213 else if (genarg < 0 || genarg >= NUM_GENERATIONS) { 1214 PyErr_SetString(PyExc_ValueError, "invalid generation"); 1215 return NULL; 1216 } 1217 1218 if (collecting) 1219 n = 0; /* already collecting, don't do anything */ 1220 else { 1221 collecting = 1; 1222 n = collect_with_callback(genarg); 1223 collecting = 0; 1224 } 1225 1226 return PyLong_FromSsize_t(n); 1227 } 1228 1229 PyDoc_STRVAR(gc_set_debug__doc__, 1230 "set_debug(flags) -> None\n" 1231 "\n" 1232 "Set the garbage collection debugging flags. Debugging information is\n" 1233 "written to sys.stderr.\n" 1234 "\n" 1235 "flags is an integer and can have the following bits turned on:\n" 1236 "\n" 1237 " DEBUG_STATS - Print statistics during collection.\n" 1238 " DEBUG_COLLECTABLE - Print collectable objects found.\n" 1239 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n" 1240 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n" 1241 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n"); 1242 1243 static PyObject * 1244 gc_set_debug(PyObject *self, PyObject *args) 1245 { 1246 if (!PyArg_ParseTuple(args, "i:set_debug", &debug)) 1247 return NULL; 1248 1249 Py_INCREF(Py_None); 1250 return Py_None; 1251 } 1252 1253 PyDoc_STRVAR(gc_get_debug__doc__, 1254 "get_debug() -> flags\n" 1255 "\n" 1256 "Get the garbage collection debugging flags.\n"); 1257 1258 static PyObject * 1259 gc_get_debug(PyObject *self, PyObject *noargs) 1260 { 1261 return Py_BuildValue("i", debug); 1262 } 1263 1264 PyDoc_STRVAR(gc_set_thresh__doc__, 1265 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n" 1266 "\n" 1267 "Sets the collection thresholds. Setting threshold0 to zero disables\n" 1268 "collection.\n"); 1269 1270 static PyObject * 1271 gc_set_thresh(PyObject *self, PyObject *args) 1272 { 1273 int i; 1274 if (!PyArg_ParseTuple(args, "i|ii:set_threshold", 1275 &generations[0].threshold, 1276 &generations[1].threshold, 1277 &generations[2].threshold)) 1278 return NULL; 1279 for (i = 2; i < NUM_GENERATIONS; i++) { 1280 /* generations higher than 2 get the same threshold */ 1281 generations[i].threshold = generations[2].threshold; 1282 } 1283 1284 Py_INCREF(Py_None); 1285 return Py_None; 1286 } 1287 1288 PyDoc_STRVAR(gc_get_thresh__doc__, 1289 "get_threshold() -> (threshold0, threshold1, threshold2)\n" 1290 "\n" 1291 "Return the current collection thresholds\n"); 1292 1293 static PyObject * 1294 gc_get_thresh(PyObject *self, PyObject *noargs) 1295 { 1296 return Py_BuildValue("(iii)", 1297 generations[0].threshold, 1298 generations[1].threshold, 1299 generations[2].threshold); 1300 } 1301 1302 PyDoc_STRVAR(gc_get_count__doc__, 1303 "get_count() -> (count0, count1, count2)\n" 1304 "\n" 1305 "Return the current collection counts\n"); 1306 1307 static PyObject * 1308 gc_get_count(PyObject *self, PyObject *noargs) 1309 { 1310 return Py_BuildValue("(iii)", 1311 generations[0].count, 1312 generations[1].count, 1313 generations[2].count); 1314 } 1315 1316 static int 1317 referrersvisit(PyObject* obj, PyObject *objs) 1318 { 1319 Py_ssize_t i; 1320 for (i = 0; i < PyTuple_GET_SIZE(objs); i++) 1321 if (PyTuple_GET_ITEM(objs, i) == obj) 1322 return 1; 1323 return 0; 1324 } 1325 1326 static int 1327 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) 1328 { 1329 PyGC_Head *gc; 1330 PyObject *obj; 1331 traverseproc traverse; 1332 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { 1333 obj = FROM_GC(gc); 1334 traverse = Py_TYPE(obj)->tp_traverse; 1335 if (obj == objs || obj == resultlist) 1336 continue; 1337 if (traverse(obj, (visitproc)referrersvisit, objs)) { 1338 if (PyList_Append(resultlist, obj) < 0) 1339 return 0; /* error */ 1340 } 1341 } 1342 return 1; /* no error */ 1343 } 1344 1345 PyDoc_STRVAR(gc_get_referrers__doc__, 1346 "get_referrers(*objs) -> list\n\ 1347 Return the list of objects that directly refer to any of objs."); 1348 1349 static PyObject * 1350 gc_get_referrers(PyObject *self, PyObject *args) 1351 { 1352 int i; 1353 PyObject *result = PyList_New(0); 1354 if (!result) return NULL; 1355 1356 for (i = 0; i < NUM_GENERATIONS; i++) { 1357 if (!(gc_referrers_for(args, GEN_HEAD(i), result))) { 1358 Py_DECREF(result); 1359 return NULL; 1360 } 1361 } 1362 return result; 1363 } 1364 1365 /* Append obj to list; return true if error (out of memory), false if OK. */ 1366 static int 1367 referentsvisit(PyObject *obj, PyObject *list) 1368 { 1369 return PyList_Append(list, obj) < 0; 1370 } 1371 1372 PyDoc_STRVAR(gc_get_referents__doc__, 1373 "get_referents(*objs) -> list\n\ 1374 Return the list of objects that are directly referred to by objs."); 1375 1376 static PyObject * 1377 gc_get_referents(PyObject *self, PyObject *args) 1378 { 1379 Py_ssize_t i; 1380 PyObject *result = PyList_New(0); 1381 1382 if (result == NULL) 1383 return NULL; 1384 1385 for (i = 0; i < PyTuple_GET_SIZE(args); i++) { 1386 traverseproc traverse; 1387 PyObject *obj = PyTuple_GET_ITEM(args, i); 1388 1389 if (! PyObject_IS_GC(obj)) 1390 continue; 1391 traverse = Py_TYPE(obj)->tp_traverse; 1392 if (! traverse) 1393 continue; 1394 if (traverse(obj, (visitproc)referentsvisit, result)) { 1395 Py_DECREF(result); 1396 return NULL; 1397 } 1398 } 1399 return result; 1400 } 1401 1402 PyDoc_STRVAR(gc_get_objects__doc__, 1403 "get_objects() -> [...]\n" 1404 "\n" 1405 "Return a list of objects tracked by the collector (excluding the list\n" 1406 "returned).\n"); 1407 1408 static PyObject * 1409 gc_get_objects(PyObject *self, PyObject *noargs) 1410 { 1411 int i; 1412 PyObject* result; 1413 1414 result = PyList_New(0); 1415 if (result == NULL) 1416 return NULL; 1417 for (i = 0; i < NUM_GENERATIONS; i++) { 1418 if (append_objects(result, GEN_HEAD(i))) { 1419 Py_DECREF(result); 1420 return NULL; 1421 } 1422 } 1423 return result; 1424 } 1425 1426 PyDoc_STRVAR(gc_get_stats__doc__, 1427 "get_stats() -> [...]\n" 1428 "\n" 1429 "Return a list of dictionaries containing per-generation statistics.\n"); 1430 1431 static PyObject * 1432 gc_get_stats(PyObject *self, PyObject *noargs) 1433 { 1434 int i; 1435 PyObject *result; 1436 struct gc_generation_stats stats[NUM_GENERATIONS], *st; 1437 1438 /* To get consistent values despite allocations while constructing 1439 the result list, we use a snapshot of the running stats. */ 1440 for (i = 0; i < NUM_GENERATIONS; i++) { 1441 stats[i] = generation_stats[i]; 1442 } 1443 1444 result = PyList_New(0); 1445 if (result == NULL) 1446 return NULL; 1447 1448 for (i = 0; i < NUM_GENERATIONS; i++) { 1449 PyObject *dict; 1450 st = &stats[i]; 1451 dict = Py_BuildValue("{snsnsn}", 1452 "collections", st->collections, 1453 "collected", st->collected, 1454 "uncollectable", st->uncollectable 1455 ); 1456 if (dict == NULL) 1457 goto error; 1458 if (PyList_Append(result, dict)) { 1459 Py_DECREF(dict); 1460 goto error; 1461 } 1462 Py_DECREF(dict); 1463 } 1464 return result; 1465 1466 error: 1467 Py_XDECREF(result); 1468 return NULL; 1469 } 1470 1471 1472 PyDoc_STRVAR(gc_is_tracked__doc__, 1473 "is_tracked(obj) -> bool\n" 1474 "\n" 1475 "Returns true if the object is tracked by the garbage collector.\n" 1476 "Simple atomic objects will return false.\n" 1477 ); 1478 1479 static PyObject * 1480 gc_is_tracked(PyObject *self, PyObject *obj) 1481 { 1482 PyObject *result; 1483 1484 if (PyObject_IS_GC(obj) && IS_TRACKED(obj)) 1485 result = Py_True; 1486 else 1487 result = Py_False; 1488 Py_INCREF(result); 1489 return result; 1490 } 1491 1492 1493 PyDoc_STRVAR(gc__doc__, 1494 "This module provides access to the garbage collector for reference cycles.\n" 1495 "\n" 1496 "enable() -- Enable automatic garbage collection.\n" 1497 "disable() -- Disable automatic garbage collection.\n" 1498 "isenabled() -- Returns true if automatic collection is enabled.\n" 1499 "collect() -- Do a full collection right now.\n" 1500 "get_count() -- Return the current collection counts.\n" 1501 "get_stats() -- Return list of dictionaries containing per-generation stats.\n" 1502 "set_debug() -- Set debugging flags.\n" 1503 "get_debug() -- Get debugging flags.\n" 1504 "set_threshold() -- Set the collection thresholds.\n" 1505 "get_threshold() -- Return the current the collection thresholds.\n" 1506 "get_objects() -- Return a list of all objects tracked by the collector.\n" 1507 "is_tracked() -- Returns true if a given object is tracked.\n" 1508 "get_referrers() -- Return the list of objects that refer to an object.\n" 1509 "get_referents() -- Return the list of objects that an object refers to.\n"); 1510 1511 static PyMethodDef GcMethods[] = { 1512 {"enable", gc_enable, METH_NOARGS, gc_enable__doc__}, 1513 {"disable", gc_disable, METH_NOARGS, gc_disable__doc__}, 1514 {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__}, 1515 {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__}, 1516 {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__}, 1517 {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__}, 1518 {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__}, 1519 {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__}, 1520 {"collect", (PyCFunction)gc_collect, 1521 METH_VARARGS | METH_KEYWORDS, gc_collect__doc__}, 1522 {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__}, 1523 {"get_stats", gc_get_stats, METH_NOARGS, gc_get_stats__doc__}, 1524 {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__}, 1525 {"get_referrers", gc_get_referrers, METH_VARARGS, 1526 gc_get_referrers__doc__}, 1527 {"get_referents", gc_get_referents, METH_VARARGS, 1528 gc_get_referents__doc__}, 1529 {NULL, NULL} /* Sentinel */ 1530 }; 1531 1532 static struct PyModuleDef gcmodule = { 1533 PyModuleDef_HEAD_INIT, 1534 "gc", /* m_name */ 1535 gc__doc__, /* m_doc */ 1536 -1, /* m_size */ 1537 GcMethods, /* m_methods */ 1538 NULL, /* m_reload */ 1539 NULL, /* m_traverse */ 1540 NULL, /* m_clear */ 1541 NULL /* m_free */ 1542 }; 1543 1544 PyMODINIT_FUNC 1545 PyInit_gc(void) 1546 { 1547 PyObject *m; 1548 1549 m = PyModule_Create(&gcmodule); 1550 1551 if (m == NULL) 1552 return NULL; 1553 1554 if (garbage == NULL) { 1555 garbage = PyList_New(0); 1556 if (garbage == NULL) 1557 return NULL; 1558 } 1559 Py_INCREF(garbage); 1560 if (PyModule_AddObject(m, "garbage", garbage) < 0) 1561 return NULL; 1562 1563 if (callbacks == NULL) { 1564 callbacks = PyList_New(0); 1565 if (callbacks == NULL) 1566 return NULL; 1567 } 1568 Py_INCREF(callbacks); 1569 if (PyModule_AddObject(m, "callbacks", callbacks) < 0) 1570 return NULL; 1571 1572 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL 1573 ADD_INT(DEBUG_STATS); 1574 ADD_INT(DEBUG_COLLECTABLE); 1575 ADD_INT(DEBUG_UNCOLLECTABLE); 1576 ADD_INT(DEBUG_SAVEALL); 1577 ADD_INT(DEBUG_LEAK); 1578 #undef ADD_INT 1579 return m; 1580 } 1581 1582 /* API to invoke gc.collect() from C */ 1583 Py_ssize_t 1584 PyGC_Collect(void) 1585 { 1586 Py_ssize_t n; 1587 1588 if (collecting) 1589 n = 0; /* already collecting, don't do anything */ 1590 else { 1591 collecting = 1; 1592 n = collect_with_callback(NUM_GENERATIONS - 1); 1593 collecting = 0; 1594 } 1595 1596 return n; 1597 } 1598 1599 Py_ssize_t 1600 _PyGC_CollectIfEnabled(void) 1601 { 1602 if (!enabled) 1603 return 0; 1604 1605 return PyGC_Collect(); 1606 } 1607 1608 Py_ssize_t 1609 _PyGC_CollectNoFail(void) 1610 { 1611 Py_ssize_t n; 1612 1613 /* Ideally, this function is only called on interpreter shutdown, 1614 and therefore not recursively. Unfortunately, when there are daemon 1615 threads, a daemon thread can start a cyclic garbage collection 1616 during interpreter shutdown (and then never finish it). 1617 See http://bugs.python.org/issue8713#msg195178 for an example. 1618 */ 1619 if (collecting) 1620 n = 0; 1621 else { 1622 collecting = 1; 1623 n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1); 1624 collecting = 0; 1625 } 1626 return n; 1627 } 1628 1629 void 1630 _PyGC_DumpShutdownStats(void) 1631 { 1632 if (!(debug & DEBUG_SAVEALL) 1633 && garbage != NULL && PyList_GET_SIZE(garbage) > 0) { 1634 char *message; 1635 if (debug & DEBUG_UNCOLLECTABLE) 1636 message = "gc: %zd uncollectable objects at " \ 1637 "shutdown"; 1638 else 1639 message = "gc: %zd uncollectable objects at " \ 1640 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; 1641 /* PyErr_WarnFormat does too many things and we are at shutdown, 1642 the warnings module's dependencies (e.g. linecache) may be gone 1643 already. */ 1644 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, 1645 "gc", NULL, message, 1646 PyList_GET_SIZE(garbage))) 1647 PyErr_WriteUnraisable(NULL); 1648 if (debug & DEBUG_UNCOLLECTABLE) { 1649 PyObject *repr = NULL, *bytes = NULL; 1650 repr = PyObject_Repr(garbage); 1651 if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) 1652 PyErr_WriteUnraisable(garbage); 1653 else { 1654 PySys_WriteStderr( 1655 " %s\n", 1656 PyBytes_AS_STRING(bytes) 1657 ); 1658 } 1659 Py_XDECREF(repr); 1660 Py_XDECREF(bytes); 1661 } 1662 } 1663 } 1664 1665 void 1666 _PyGC_Fini(void) 1667 { 1668 Py_CLEAR(callbacks); 1669 } 1670 1671 /* for debugging */ 1672 void 1673 _PyGC_Dump(PyGC_Head *g) 1674 { 1675 _PyObject_Dump(FROM_GC(g)); 1676 } 1677 1678 /* extension modules might be compiled with GC support so these 1679 functions must always be available */ 1680 1681 #undef PyObject_GC_Track 1682 #undef PyObject_GC_UnTrack 1683 #undef PyObject_GC_Del 1684 #undef _PyObject_GC_Malloc 1685 1686 void 1687 PyObject_GC_Track(void *op) 1688 { 1689 _PyObject_GC_TRACK(op); 1690 } 1691 1692 void 1693 PyObject_GC_UnTrack(void *op) 1694 { 1695 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to 1696 * call PyObject_GC_UnTrack twice on an object. 1697 */ 1698 if (IS_TRACKED(op)) 1699 _PyObject_GC_UNTRACK(op); 1700 } 1701 1702 static PyObject * 1703 _PyObject_GC_Alloc(int use_calloc, size_t basicsize) 1704 { 1705 PyObject *op; 1706 PyGC_Head *g; 1707 size_t size; 1708 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) 1709 return PyErr_NoMemory(); 1710 size = sizeof(PyGC_Head) + basicsize; 1711 if (use_calloc) 1712 g = (PyGC_Head *)PyObject_Calloc(1, size); 1713 else 1714 g = (PyGC_Head *)PyObject_Malloc(size); 1715 if (g == NULL) 1716 return PyErr_NoMemory(); 1717 g->gc.gc_refs = 0; 1718 _PyGCHead_SET_REFS(g, GC_UNTRACKED); 1719 generations[0].count++; /* number of allocated GC objects */ 1720 if (generations[0].count > generations[0].threshold && 1721 enabled && 1722 generations[0].threshold && 1723 !collecting && 1724 !PyErr_Occurred()) { 1725 collecting = 1; 1726 collect_generations(); 1727 collecting = 0; 1728 } 1729 op = FROM_GC(g); 1730 return op; 1731 } 1732 1733 PyObject * 1734 _PyObject_GC_Malloc(size_t basicsize) 1735 { 1736 return _PyObject_GC_Alloc(0, basicsize); 1737 } 1738 1739 PyObject * 1740 _PyObject_GC_Calloc(size_t basicsize) 1741 { 1742 return _PyObject_GC_Alloc(1, basicsize); 1743 } 1744 1745 PyObject * 1746 _PyObject_GC_New(PyTypeObject *tp) 1747 { 1748 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp)); 1749 if (op != NULL) 1750 op = PyObject_INIT(op, tp); 1751 return op; 1752 } 1753 1754 PyVarObject * 1755 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) 1756 { 1757 size_t size; 1758 PyVarObject *op; 1759 1760 if (nitems < 0) { 1761 PyErr_BadInternalCall(); 1762 return NULL; 1763 } 1764 size = _PyObject_VAR_SIZE(tp, nitems); 1765 op = (PyVarObject *) _PyObject_GC_Malloc(size); 1766 if (op != NULL) 1767 op = PyObject_INIT_VAR(op, tp, nitems); 1768 return op; 1769 } 1770 1771 PyVarObject * 1772 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) 1773 { 1774 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); 1775 PyGC_Head *g = AS_GC(op); 1776 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) 1777 return (PyVarObject *)PyErr_NoMemory(); 1778 g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize); 1779 if (g == NULL) 1780 return (PyVarObject *)PyErr_NoMemory(); 1781 op = (PyVarObject *) FROM_GC(g); 1782 Py_SIZE(op) = nitems; 1783 return op; 1784 } 1785 1786 void 1787 PyObject_GC_Del(void *op) 1788 { 1789 PyGC_Head *g = AS_GC(op); 1790 if (IS_TRACKED(op)) 1791 gc_list_remove(g); 1792 if (generations[0].count > 0) { 1793 generations[0].count--; 1794 } 1795 PyObject_FREE(g); 1796 } 1797