Home | History | Annotate | Download | only in Objects
      1 #include "Python.h"
      2 
      3 #include <stdbool.h>
      4 
      5 
      6 /* Defined in tracemalloc.c */
      7 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
      8 
      9 
     10 /* Python's malloc wrappers (see pymem.h) */
     11 
     12 #undef  uint
     13 #define uint    unsigned int    /* assuming >= 16 bits */
     14 
     15 /* Forward declaration */
     16 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
     17 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
     18 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
     19 static void _PyMem_DebugRawFree(void *ctx, void *p);
     20 
     21 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
     22 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
     23 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
     24 static void _PyMem_DebugFree(void *ctx, void *p);
     25 
     26 static void _PyObject_DebugDumpAddress(const void *p);
     27 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
     28 
     29 #if defined(__has_feature)  /* Clang */
     30  #if __has_feature(address_sanitizer)  /* is ASAN enabled? */
     31   #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
     32         __attribute__((no_address_safety_analysis))
     33  #else
     34   #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
     35  #endif
     36 #else
     37  #if defined(__SANITIZE_ADDRESS__)  /* GCC 4.8.x, is ASAN enabled? */
     38   #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
     39         __attribute__((no_address_safety_analysis))
     40  #else
     41   #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
     42  #endif
     43 #endif
     44 
     45 #ifdef WITH_PYMALLOC
     46 
     47 #ifdef MS_WINDOWS
     48 #  include <windows.h>
     49 #elif defined(HAVE_MMAP)
     50 #  include <sys/mman.h>
     51 #  ifdef MAP_ANONYMOUS
     52 #    define ARENAS_USE_MMAP
     53 #  endif
     54 #endif
     55 
     56 /* Forward declaration */
     57 static void* _PyObject_Malloc(void *ctx, size_t size);
     58 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
     59 static void _PyObject_Free(void *ctx, void *p);
     60 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
     61 #endif
     62 
     63 
     64 static void *
     65 _PyMem_RawMalloc(void *ctx, size_t size)
     66 {
     67     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
     68        for malloc(0), which would be treated as an error. Some platforms would
     69        return a pointer with no memory behind it, which would break pymalloc.
     70        To solve these problems, allocate an extra byte. */
     71     if (size == 0)
     72         size = 1;
     73     return malloc(size);
     74 }
     75 
     76 static void *
     77 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
     78 {
     79     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
     80        for calloc(0, 0), which would be treated as an error. Some platforms
     81        would return a pointer with no memory behind it, which would break
     82        pymalloc.  To solve these problems, allocate an extra byte. */
     83     if (nelem == 0 || elsize == 0) {
     84         nelem = 1;
     85         elsize = 1;
     86     }
     87     return calloc(nelem, elsize);
     88 }
     89 
     90 static void *
     91 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
     92 {
     93     if (size == 0)
     94         size = 1;
     95     return realloc(ptr, size);
     96 }
     97 
     98 static void
     99 _PyMem_RawFree(void *ctx, void *ptr)
    100 {
    101     free(ptr);
    102 }
    103 
    104 
    105 #ifdef MS_WINDOWS
    106 static void *
    107 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
    108 {
    109     return VirtualAlloc(NULL, size,
    110                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
    111 }
    112 
    113 static void
    114 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
    115 {
    116     VirtualFree(ptr, 0, MEM_RELEASE);
    117 }
    118 
    119 #elif defined(ARENAS_USE_MMAP)
    120 static void *
    121 _PyObject_ArenaMmap(void *ctx, size_t size)
    122 {
    123     void *ptr;
    124     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
    125                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    126     if (ptr == MAP_FAILED)
    127         return NULL;
    128     assert(ptr != NULL);
    129     return ptr;
    130 }
    131 
    132 static void
    133 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
    134 {
    135     munmap(ptr, size);
    136 }
    137 
    138 #else
    139 static void *
    140 _PyObject_ArenaMalloc(void *ctx, size_t size)
    141 {
    142     return malloc(size);
    143 }
    144 
    145 static void
    146 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
    147 {
    148     free(ptr);
    149 }
    150 #endif
    151 
    152 
    153 #define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree
    154 #ifdef WITH_PYMALLOC
    155 #  define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free
    156 #else
    157 #  define PYOBJ_FUNCS PYRAW_FUNCS
    158 #endif
    159 #define PYMEM_FUNCS PYOBJ_FUNCS
    160 
    161 typedef struct {
    162     /* We tag each block with an API ID in order to tag API violations */
    163     char api_id;
    164     PyMemAllocatorEx alloc;
    165 } debug_alloc_api_t;
    166 static struct {
    167     debug_alloc_api_t raw;
    168     debug_alloc_api_t mem;
    169     debug_alloc_api_t obj;
    170 } _PyMem_Debug = {
    171     {'r', {NULL, PYRAW_FUNCS}},
    172     {'m', {NULL, PYMEM_FUNCS}},
    173     {'o', {NULL, PYOBJ_FUNCS}}
    174     };
    175 
    176 #define PYRAWDBG_FUNCS \
    177     _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree
    178 #define PYDBG_FUNCS \
    179     _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
    180 
    181 static PyMemAllocatorEx _PyMem_Raw = {
    182 #ifdef Py_DEBUG
    183     &_PyMem_Debug.raw, PYRAWDBG_FUNCS
    184 #else
    185     NULL, PYRAW_FUNCS
    186 #endif
    187     };
    188 
    189 static PyMemAllocatorEx _PyMem = {
    190 #ifdef Py_DEBUG
    191     &_PyMem_Debug.mem, PYDBG_FUNCS
    192 #else
    193     NULL, PYMEM_FUNCS
    194 #endif
    195     };
    196 
    197 static PyMemAllocatorEx _PyObject = {
    198 #ifdef Py_DEBUG
    199     &_PyMem_Debug.obj, PYDBG_FUNCS
    200 #else
    201     NULL, PYOBJ_FUNCS
    202 #endif
    203     };
    204 
    205 int
    206 _PyMem_SetupAllocators(const char *opt)
    207 {
    208     if (opt == NULL || *opt == '\0') {
    209         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
    210            options): use default allocators */
    211 #ifdef Py_DEBUG
    212 #  ifdef WITH_PYMALLOC
    213         opt = "pymalloc_debug";
    214 #  else
    215         opt = "malloc_debug";
    216 #  endif
    217 #else
    218    /* !Py_DEBUG */
    219 #  ifdef WITH_PYMALLOC
    220         opt = "pymalloc";
    221 #  else
    222         opt = "malloc";
    223 #  endif
    224 #endif
    225     }
    226 
    227     if (strcmp(opt, "debug") == 0) {
    228         PyMem_SetupDebugHooks();
    229     }
    230     else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0)
    231     {
    232         PyMemAllocatorEx alloc = {NULL, PYRAW_FUNCS};
    233 
    234         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
    235         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
    236         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
    237 
    238         if (strcmp(opt, "malloc_debug") == 0)
    239             PyMem_SetupDebugHooks();
    240     }
    241 #ifdef WITH_PYMALLOC
    242     else if (strcmp(opt, "pymalloc") == 0
    243              || strcmp(opt, "pymalloc_debug") == 0)
    244     {
    245         PyMemAllocatorEx raw_alloc = {NULL, PYRAW_FUNCS};
    246         PyMemAllocatorEx mem_alloc = {NULL, PYMEM_FUNCS};
    247         PyMemAllocatorEx obj_alloc = {NULL, PYOBJ_FUNCS};
    248 
    249         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &raw_alloc);
    250         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &mem_alloc);
    251         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &obj_alloc);
    252 
    253         if (strcmp(opt, "pymalloc_debug") == 0)
    254             PyMem_SetupDebugHooks();
    255     }
    256 #endif
    257     else {
    258         /* unknown allocator */
    259         return -1;
    260     }
    261     return 0;
    262 }
    263 
    264 #undef PYRAW_FUNCS
    265 #undef PYMEM_FUNCS
    266 #undef PYOBJ_FUNCS
    267 #undef PYRAWDBG_FUNCS
    268 #undef PYDBG_FUNCS
    269 
    270 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
    271 #ifdef MS_WINDOWS
    272     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
    273 #elif defined(ARENAS_USE_MMAP)
    274     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
    275 #else
    276     _PyObject_ArenaMalloc, _PyObject_ArenaFree
    277 #endif
    278     };
    279 
    280 #ifdef WITH_PYMALLOC
    281 static int
    282 _PyMem_DebugEnabled(void)
    283 {
    284     return (_PyObject.malloc == _PyMem_DebugMalloc);
    285 }
    286 
    287 int
    288 _PyMem_PymallocEnabled(void)
    289 {
    290     if (_PyMem_DebugEnabled()) {
    291         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
    292     }
    293     else {
    294         return (_PyObject.malloc == _PyObject_Malloc);
    295     }
    296 }
    297 #endif
    298 
    299 void
    300 PyMem_SetupDebugHooks(void)
    301 {
    302     PyMemAllocatorEx alloc;
    303 
    304     alloc.malloc = _PyMem_DebugRawMalloc;
    305     alloc.calloc = _PyMem_DebugRawCalloc;
    306     alloc.realloc = _PyMem_DebugRawRealloc;
    307     alloc.free = _PyMem_DebugRawFree;
    308 
    309     if (_PyMem_Raw.malloc != _PyMem_DebugRawMalloc) {
    310         alloc.ctx = &_PyMem_Debug.raw;
    311         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
    312         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
    313     }
    314 
    315     alloc.malloc = _PyMem_DebugMalloc;
    316     alloc.calloc = _PyMem_DebugCalloc;
    317     alloc.realloc = _PyMem_DebugRealloc;
    318     alloc.free = _PyMem_DebugFree;
    319 
    320     if (_PyMem.malloc != _PyMem_DebugMalloc) {
    321         alloc.ctx = &_PyMem_Debug.mem;
    322         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
    323         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
    324     }
    325 
    326     if (_PyObject.malloc != _PyMem_DebugMalloc) {
    327         alloc.ctx = &_PyMem_Debug.obj;
    328         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
    329         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
    330     }
    331 }
    332 
    333 void
    334 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
    335 {
    336     switch(domain)
    337     {
    338     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
    339     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
    340     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
    341     default:
    342         /* unknown domain: set all attributes to NULL */
    343         allocator->ctx = NULL;
    344         allocator->malloc = NULL;
    345         allocator->calloc = NULL;
    346         allocator->realloc = NULL;
    347         allocator->free = NULL;
    348     }
    349 }
    350 
    351 void
    352 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
    353 {
    354     switch(domain)
    355     {
    356     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
    357     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
    358     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
    359     /* ignore unknown domain */
    360     }
    361 }
    362 
    363 void
    364 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
    365 {
    366     *allocator = _PyObject_Arena;
    367 }
    368 
    369 void
    370 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
    371 {
    372     _PyObject_Arena = *allocator;
    373 }
    374 
    375 void *
    376 PyMem_RawMalloc(size_t size)
    377 {
    378     /*
    379      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
    380      * Most python internals blindly use a signed Py_ssize_t to track
    381      * things without checking for overflows or negatives.
    382      * As size_t is unsigned, checking for size < 0 is not required.
    383      */
    384     if (size > (size_t)PY_SSIZE_T_MAX)
    385         return NULL;
    386     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
    387 }
    388 
    389 void *
    390 PyMem_RawCalloc(size_t nelem, size_t elsize)
    391 {
    392     /* see PyMem_RawMalloc() */
    393     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
    394         return NULL;
    395     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
    396 }
    397 
    398 void*
    399 PyMem_RawRealloc(void *ptr, size_t new_size)
    400 {
    401     /* see PyMem_RawMalloc() */
    402     if (new_size > (size_t)PY_SSIZE_T_MAX)
    403         return NULL;
    404     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
    405 }
    406 
    407 void PyMem_RawFree(void *ptr)
    408 {
    409     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
    410 }
    411 
    412 void *
    413 PyMem_Malloc(size_t size)
    414 {
    415     /* see PyMem_RawMalloc() */
    416     if (size > (size_t)PY_SSIZE_T_MAX)
    417         return NULL;
    418     return _PyMem.malloc(_PyMem.ctx, size);
    419 }
    420 
    421 void *
    422 PyMem_Calloc(size_t nelem, size_t elsize)
    423 {
    424     /* see PyMem_RawMalloc() */
    425     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
    426         return NULL;
    427     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
    428 }
    429 
    430 void *
    431 PyMem_Realloc(void *ptr, size_t new_size)
    432 {
    433     /* see PyMem_RawMalloc() */
    434     if (new_size > (size_t)PY_SSIZE_T_MAX)
    435         return NULL;
    436     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
    437 }
    438 
    439 void
    440 PyMem_Free(void *ptr)
    441 {
    442     _PyMem.free(_PyMem.ctx, ptr);
    443 }
    444 
    445 char *
    446 _PyMem_RawStrdup(const char *str)
    447 {
    448     size_t size;
    449     char *copy;
    450 
    451     size = strlen(str) + 1;
    452     copy = PyMem_RawMalloc(size);
    453     if (copy == NULL)
    454         return NULL;
    455     memcpy(copy, str, size);
    456     return copy;
    457 }
    458 
    459 char *
    460 _PyMem_Strdup(const char *str)
    461 {
    462     size_t size;
    463     char *copy;
    464 
    465     size = strlen(str) + 1;
    466     copy = PyMem_Malloc(size);
    467     if (copy == NULL)
    468         return NULL;
    469     memcpy(copy, str, size);
    470     return copy;
    471 }
    472 
    473 void *
    474 PyObject_Malloc(size_t size)
    475 {
    476     /* see PyMem_RawMalloc() */
    477     if (size > (size_t)PY_SSIZE_T_MAX)
    478         return NULL;
    479     return _PyObject.malloc(_PyObject.ctx, size);
    480 }
    481 
    482 void *
    483 PyObject_Calloc(size_t nelem, size_t elsize)
    484 {
    485     /* see PyMem_RawMalloc() */
    486     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
    487         return NULL;
    488     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
    489 }
    490 
    491 void *
    492 PyObject_Realloc(void *ptr, size_t new_size)
    493 {
    494     /* see PyMem_RawMalloc() */
    495     if (new_size > (size_t)PY_SSIZE_T_MAX)
    496         return NULL;
    497     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
    498 }
    499 
    500 void
    501 PyObject_Free(void *ptr)
    502 {
    503     _PyObject.free(_PyObject.ctx, ptr);
    504 }
    505 
    506 
    507 #ifdef WITH_PYMALLOC
    508 
    509 #ifdef WITH_VALGRIND
    510 #include <valgrind/valgrind.h>
    511 
    512 /* If we're using GCC, use __builtin_expect() to reduce overhead of
    513    the valgrind checks */
    514 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
    515 #  define UNLIKELY(value) __builtin_expect((value), 0)
    516 #else
    517 #  define UNLIKELY(value) (value)
    518 #endif
    519 
    520 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
    521 static int running_on_valgrind = -1;
    522 #endif
    523 
    524 /* An object allocator for Python.
    525 
    526    Here is an introduction to the layers of the Python memory architecture,
    527    showing where the object allocator is actually used (layer +2), It is
    528    called for every object allocation and deallocation (PyObject_New/Del),
    529    unless the object-specific allocators implement a proprietary allocation
    530    scheme (ex.: ints use a simple free list). This is also the place where
    531    the cyclic garbage collector operates selectively on container objects.
    532 
    533 
    534     Object-specific allocators
    535     _____   ______   ______       ________
    536    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
    537 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
    538     _______________________________       |                           |
    539    [   Python's object allocator   ]      |                           |
    540 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
    541     ______________________________________________________________    |
    542    [          Python's raw memory allocator (PyMem_ API)          ]   |
    543 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
    544     __________________________________________________________________
    545    [    Underlying general-purpose allocator (ex: C library malloc)   ]
    546  0 | <------ Virtual memory allocated for the python process -------> |
    547 
    548    =========================================================================
    549     _______________________________________________________________________
    550    [                OS-specific Virtual Memory Manager (VMM)               ]
    551 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
    552     __________________________________   __________________________________
    553    [                                  ] [                                  ]
    554 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
    555 
    556 */
    557 /*==========================================================================*/
    558 
    559 /* A fast, special-purpose memory allocator for small blocks, to be used
    560    on top of a general-purpose malloc -- heavily based on previous art. */
    561 
    562 /* Vladimir Marangozov -- August 2000 */
    563 
    564 /*
    565  * "Memory management is where the rubber meets the road -- if we do the wrong
    566  * thing at any level, the results will not be good. And if we don't make the
    567  * levels work well together, we are in serious trouble." (1)
    568  *
    569  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
    570  *    "Dynamic Storage Allocation: A Survey and Critical Review",
    571  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
    572  */
    573 
    574 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
    575 
    576 /*==========================================================================*/
    577 
    578 /*
    579  * Allocation strategy abstract:
    580  *
    581  * For small requests, the allocator sub-allocates <Big> blocks of memory.
    582  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
    583  * system's allocator.
    584  *
    585  * Small requests are grouped in size classes spaced 8 bytes apart, due
    586  * to the required valid alignment of the returned address. Requests of
    587  * a particular size are serviced from memory pools of 4K (one VMM page).
    588  * Pools are fragmented on demand and contain free lists of blocks of one
    589  * particular size class. In other words, there is a fixed-size allocator
    590  * for each size class. Free pools are shared by the different allocators
    591  * thus minimizing the space reserved for a particular size class.
    592  *
    593  * This allocation strategy is a variant of what is known as "simple
    594  * segregated storage based on array of free lists". The main drawback of
    595  * simple segregated storage is that we might end up with lot of reserved
    596  * memory for the different free lists, which degenerate in time. To avoid
    597  * this, we partition each free list in pools and we share dynamically the
    598  * reserved space between all free lists. This technique is quite efficient
    599  * for memory intensive programs which allocate mainly small-sized blocks.
    600  *
    601  * For small requests we have the following table:
    602  *
    603  * Request in bytes     Size of allocated block      Size class idx
    604  * ----------------------------------------------------------------
    605  *        1-8                     8                       0
    606  *        9-16                   16                       1
    607  *       17-24                   24                       2
    608  *       25-32                   32                       3
    609  *       33-40                   40                       4
    610  *       41-48                   48                       5
    611  *       49-56                   56                       6
    612  *       57-64                   64                       7
    613  *       65-72                   72                       8
    614  *        ...                   ...                     ...
    615  *      497-504                 504                      62
    616  *      505-512                 512                      63
    617  *
    618  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
    619  *      allocator.
    620  */
    621 
    622 /*==========================================================================*/
    623 
    624 /*
    625  * -- Main tunable settings section --
    626  */
    627 
    628 /*
    629  * Alignment of addresses returned to the user. 8-bytes alignment works
    630  * on most current architectures (with 32-bit or 64-bit address busses).
    631  * The alignment value is also used for grouping small requests in size
    632  * classes spaced ALIGNMENT bytes apart.
    633  *
    634  * You shouldn't change this unless you know what you are doing.
    635  */
    636 #define ALIGNMENT               8               /* must be 2^N */
    637 #define ALIGNMENT_SHIFT         3
    638 
    639 /* Return the number of bytes in size class I, as a uint. */
    640 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
    641 
    642 /*
    643  * Max size threshold below which malloc requests are considered to be
    644  * small enough in order to use preallocated memory pools. You can tune
    645  * this value according to your application behaviour and memory needs.
    646  *
    647  * Note: a size threshold of 512 guarantees that newly created dictionaries
    648  * will be allocated from preallocated memory pools on 64-bit.
    649  *
    650  * The following invariants must hold:
    651  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
    652  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
    653  *
    654  * Although not required, for better performance and space efficiency,
    655  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
    656  */
    657 #define SMALL_REQUEST_THRESHOLD 512
    658 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
    659 
    660 /*
    661  * The system's VMM page size can be obtained on most unices with a
    662  * getpagesize() call or deduced from various header files. To make
    663  * things simpler, we assume that it is 4K, which is OK for most systems.
    664  * It is probably better if this is the native page size, but it doesn't
    665  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
    666  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
    667  * violation fault.  4K is apparently OK for all the platforms that python
    668  * currently targets.
    669  */
    670 #define SYSTEM_PAGE_SIZE        (4 * 1024)
    671 #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
    672 
    673 /*
    674  * Maximum amount of memory managed by the allocator for small requests.
    675  */
    676 #ifdef WITH_MEMORY_LIMITS
    677 #ifndef SMALL_MEMORY_LIMIT
    678 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
    679 #endif
    680 #endif
    681 
    682 /*
    683  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
    684  * on a page boundary. This is a reserved virtual address space for the
    685  * current process (obtained through a malloc()/mmap() call). In no way this
    686  * means that the memory arenas will be used entirely. A malloc(<Big>) is
    687  * usually an address range reservation for <Big> bytes, unless all pages within
    688  * this space are referenced subsequently. So malloc'ing big blocks and not
    689  * using them does not mean "wasting memory". It's an addressable range
    690  * wastage...
    691  *
    692  * Arenas are allocated with mmap() on systems supporting anonymous memory
    693  * mappings to reduce heap fragmentation.
    694  */
    695 #define ARENA_SIZE              (256 << 10)     /* 256KB */
    696 
    697 #ifdef WITH_MEMORY_LIMITS
    698 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
    699 #endif
    700 
    701 /*
    702  * Size of the pools used for small blocks. Should be a power of 2,
    703  * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
    704  */
    705 #define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
    706 #define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
    707 
    708 /*
    709  * -- End of tunable settings section --
    710  */
    711 
    712 /*==========================================================================*/
    713 
    714 /*
    715  * Locking
    716  *
    717  * To reduce lock contention, it would probably be better to refine the
    718  * crude function locking with per size class locking. I'm not positive
    719  * however, whether it's worth switching to such locking policy because
    720  * of the performance penalty it might introduce.
    721  *
    722  * The following macros describe the simplest (should also be the fastest)
    723  * lock object on a particular platform and the init/fini/lock/unlock
    724  * operations on it. The locks defined here are not expected to be recursive
    725  * because it is assumed that they will always be called in the order:
    726  * INIT, [LOCK, UNLOCK]*, FINI.
    727  */
    728 
    729 /*
    730  * Python's threads are serialized, so object malloc locking is disabled.
    731  */
    732 #define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
    733 #define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
    734 #define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
    735 #define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
    736 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
    737 
    738 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
    739 typedef uint8_t block;
    740 
    741 /* Pool for small blocks. */
    742 struct pool_header {
    743     union { block *_padding;
    744             uint count; } ref;          /* number of allocated blocks    */
    745     block *freeblock;                   /* pool's free list head         */
    746     struct pool_header *nextpool;       /* next pool of this size class  */
    747     struct pool_header *prevpool;       /* previous pool       ""        */
    748     uint arenaindex;                    /* index into arenas of base adr */
    749     uint szidx;                         /* block size class index        */
    750     uint nextoffset;                    /* bytes to virgin block         */
    751     uint maxnextoffset;                 /* largest valid nextoffset      */
    752 };
    753 
    754 typedef struct pool_header *poolp;
    755 
    756 /* Record keeping for arenas. */
    757 struct arena_object {
    758     /* The address of the arena, as returned by malloc.  Note that 0
    759      * will never be returned by a successful malloc, and is used
    760      * here to mark an arena_object that doesn't correspond to an
    761      * allocated arena.
    762      */
    763     uintptr_t address;
    764 
    765     /* Pool-aligned pointer to the next pool to be carved off. */
    766     block* pool_address;
    767 
    768     /* The number of available pools in the arena:  free pools + never-
    769      * allocated pools.
    770      */
    771     uint nfreepools;
    772 
    773     /* The total number of pools in the arena, whether or not available. */
    774     uint ntotalpools;
    775 
    776     /* Singly-linked list of available pools. */
    777     struct pool_header* freepools;
    778 
    779     /* Whenever this arena_object is not associated with an allocated
    780      * arena, the nextarena member is used to link all unassociated
    781      * arena_objects in the singly-linked `unused_arena_objects` list.
    782      * The prevarena member is unused in this case.
    783      *
    784      * When this arena_object is associated with an allocated arena
    785      * with at least one available pool, both members are used in the
    786      * doubly-linked `usable_arenas` list, which is maintained in
    787      * increasing order of `nfreepools` values.
    788      *
    789      * Else this arena_object is associated with an allocated arena
    790      * all of whose pools are in use.  `nextarena` and `prevarena`
    791      * are both meaningless in this case.
    792      */
    793     struct arena_object* nextarena;
    794     struct arena_object* prevarena;
    795 };
    796 
    797 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
    798 
    799 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
    800 
    801 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
    802 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
    803 
    804 /* Return total number of blocks in pool of size index I, as a uint. */
    805 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
    806 
    807 /*==========================================================================*/
    808 
    809 /*
    810  * This malloc lock
    811  */
    812 SIMPLELOCK_DECL(_malloc_lock)
    813 #define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
    814 #define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
    815 #define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
    816 #define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
    817 
    818 /*
    819  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
    820 
    821 This is involved.  For an index i, usedpools[i+i] is the header for a list of
    822 all partially used pools holding small blocks with "size class idx" i. So
    823 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
    824 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
    825 
    826 Pools are carved off an arena's highwater mark (an arena_object's pool_address
    827 member) as needed.  Once carved off, a pool is in one of three states forever
    828 after:
    829 
    830 used == partially used, neither empty nor full
    831     At least one block in the pool is currently allocated, and at least one
    832     block in the pool is not currently allocated (note this implies a pool
    833     has room for at least two blocks).
    834     This is a pool's initial state, as a pool is created only when malloc
    835     needs space.
    836     The pool holds blocks of a fixed size, and is in the circular list headed
    837     at usedpools[i] (see above).  It's linked to the other used pools of the
    838     same size class via the pool_header's nextpool and prevpool members.
    839     If all but one block is currently allocated, a malloc can cause a
    840     transition to the full state.  If all but one block is not currently
    841     allocated, a free can cause a transition to the empty state.
    842 
    843 full == all the pool's blocks are currently allocated
    844     On transition to full, a pool is unlinked from its usedpools[] list.
    845     It's not linked to from anything then anymore, and its nextpool and
    846     prevpool members are meaningless until it transitions back to used.
    847     A free of a block in a full pool puts the pool back in the used state.
    848     Then it's linked in at the front of the appropriate usedpools[] list, so
    849     that the next allocation for its size class will reuse the freed block.
    850 
    851 empty == all the pool's blocks are currently available for allocation
    852     On transition to empty, a pool is unlinked from its usedpools[] list,
    853     and linked to the front of its arena_object's singly-linked freepools list,
    854     via its nextpool member.  The prevpool member has no meaning in this case.
    855     Empty pools have no inherent size class:  the next time a malloc finds
    856     an empty list in usedpools[], it takes the first pool off of freepools.
    857     If the size class needed happens to be the same as the size class the pool
    858     last had, some pool initialization can be skipped.
    859 
    860 
    861 Block Management
    862 
    863 Blocks within pools are again carved out as needed.  pool->freeblock points to
    864 the start of a singly-linked list of free blocks within the pool.  When a
    865 block is freed, it's inserted at the front of its pool's freeblock list.  Note
    866 that the available blocks in a pool are *not* linked all together when a pool
    867 is initialized.  Instead only "the first two" (lowest addresses) blocks are
    868 set up, returning the first such block, and setting pool->freeblock to a
    869 one-block list holding the second such block.  This is consistent with that
    870 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
    871 of memory until it's actually needed.
    872 
    873 So long as a pool is in the used state, we're certain there *is* a block
    874 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
    875 points to the end of the free list before we've carved the entire pool into
    876 blocks, that means we simply haven't yet gotten to one of the higher-address
    877 blocks.  The offset from the pool_header to the start of "the next" virgin
    878 block is stored in the pool_header nextoffset member, and the largest value
    879 of nextoffset that makes sense is stored in the maxnextoffset member when a
    880 pool is initialized.  All the blocks in a pool have been passed out at least
    881 once when and only when nextoffset > maxnextoffset.
    882 
    883 
    884 Major obscurity:  While the usedpools vector is declared to have poolp
    885 entries, it doesn't really.  It really contains two pointers per (conceptual)
    886 poolp entry, the nextpool and prevpool members of a pool_header.  The
    887 excruciating initialization code below fools C so that
    888 
    889     usedpool[i+i]
    890 
    891 "acts like" a genuine poolp, but only so long as you only reference its
    892 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
    893 compensating for that a pool_header's nextpool and prevpool members
    894 immediately follow a pool_header's first two members:
    895 
    896     union { block *_padding;
    897             uint count; } ref;
    898     block *freeblock;
    899 
    900 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
    901 contains is a fudged-up pointer p such that *if* C believes it's a poolp
    902 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
    903 circular list is empty).
    904 
    905 It's unclear why the usedpools setup is so convoluted.  It could be to
    906 minimize the amount of cache required to hold this heavily-referenced table
    907 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
    908 referencing code has to remember to "double the index" and doing so isn't
    909 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
    910 on that C doesn't insert any padding anywhere in a pool_header at or before
    911 the prevpool member.
    912 **************************************************************************** */
    913 
    914 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
    915 #define PT(x)   PTA(x), PTA(x)
    916 
    917 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
    918     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
    919 #if NB_SMALL_SIZE_CLASSES > 8
    920     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
    921 #if NB_SMALL_SIZE_CLASSES > 16
    922     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
    923 #if NB_SMALL_SIZE_CLASSES > 24
    924     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
    925 #if NB_SMALL_SIZE_CLASSES > 32
    926     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
    927 #if NB_SMALL_SIZE_CLASSES > 40
    928     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
    929 #if NB_SMALL_SIZE_CLASSES > 48
    930     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
    931 #if NB_SMALL_SIZE_CLASSES > 56
    932     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
    933 #if NB_SMALL_SIZE_CLASSES > 64
    934 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
    935 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
    936 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
    937 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
    938 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
    939 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
    940 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
    941 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
    942 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
    943 };
    944 
    945 /*==========================================================================
    946 Arena management.
    947 
    948 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
    949 which may not be currently used (== they're arena_objects that aren't
    950 currently associated with an allocated arena).  Note that arenas proper are
    951 separately malloc'ed.
    952 
    953 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
    954 we do try to free() arenas, and use some mild heuristic strategies to increase
    955 the likelihood that arenas eventually can be freed.
    956 
    957 unused_arena_objects
    958 
    959     This is a singly-linked list of the arena_objects that are currently not
    960     being used (no arena is associated with them).  Objects are taken off the
    961     head of the list in new_arena(), and are pushed on the head of the list in
    962     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
    963     is on this list if and only if its .address member is 0.
    964 
    965 usable_arenas
    966 
    967     This is a doubly-linked list of the arena_objects associated with arenas
    968     that have pools available.  These pools are either waiting to be reused,
    969     or have not been used before.  The list is sorted to have the most-
    970     allocated arenas first (ascending order based on the nfreepools member).
    971     This means that the next allocation will come from a heavily used arena,
    972     which gives the nearly empty arenas a chance to be returned to the system.
    973     In my unscientific tests this dramatically improved the number of arenas
    974     that could be freed.
    975 
    976 Note that an arena_object associated with an arena all of whose pools are
    977 currently in use isn't on either list.
    978 */
    979 
    980 /* Array of objects used to track chunks of memory (arenas). */
    981 static struct arena_object* arenas = NULL;
    982 /* Number of slots currently allocated in the `arenas` vector. */
    983 static uint maxarenas = 0;
    984 
    985 /* The head of the singly-linked, NULL-terminated list of available
    986  * arena_objects.
    987  */
    988 static struct arena_object* unused_arena_objects = NULL;
    989 
    990 /* The head of the doubly-linked, NULL-terminated at each end, list of
    991  * arena_objects associated with arenas that have pools available.
    992  */
    993 static struct arena_object* usable_arenas = NULL;
    994 
    995 /* How many arena_objects do we initially allocate?
    996  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
    997  * `arenas` vector.
    998  */
    999 #define INITIAL_ARENA_OBJECTS 16
   1000 
   1001 /* Number of arenas allocated that haven't been free()'d. */
   1002 static size_t narenas_currently_allocated = 0;
   1003 
   1004 /* Total number of times malloc() called to allocate an arena. */
   1005 static size_t ntimes_arena_allocated = 0;
   1006 /* High water mark (max value ever seen) for narenas_currently_allocated. */
   1007 static size_t narenas_highwater = 0;
   1008 
   1009 static Py_ssize_t _Py_AllocatedBlocks = 0;
   1010 
   1011 Py_ssize_t
   1012 _Py_GetAllocatedBlocks(void)
   1013 {
   1014     return _Py_AllocatedBlocks;
   1015 }
   1016 
   1017 
   1018 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
   1019  * allocate a new arena, and return the address of an arena_object
   1020  * describing the new arena.  It's expected that the caller will set
   1021  * `usable_arenas` to the return value.
   1022  */
   1023 static struct arena_object*
   1024 new_arena(void)
   1025 {
   1026     struct arena_object* arenaobj;
   1027     uint excess;        /* number of bytes above pool alignment */
   1028     void *address;
   1029     static int debug_stats = -1;
   1030 
   1031     if (debug_stats == -1) {
   1032         char *opt = Py_GETENV("PYTHONMALLOCSTATS");
   1033         debug_stats = (opt != NULL && *opt != '\0');
   1034     }
   1035     if (debug_stats)
   1036         _PyObject_DebugMallocStats(stderr);
   1037 
   1038     if (unused_arena_objects == NULL) {
   1039         uint i;
   1040         uint numarenas;
   1041         size_t nbytes;
   1042 
   1043         /* Double the number of arena objects on each allocation.
   1044          * Note that it's possible for `numarenas` to overflow.
   1045          */
   1046         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
   1047         if (numarenas <= maxarenas)
   1048             return NULL;                /* overflow */
   1049 #if SIZEOF_SIZE_T <= SIZEOF_INT
   1050         if (numarenas > SIZE_MAX / sizeof(*arenas))
   1051             return NULL;                /* overflow */
   1052 #endif
   1053         nbytes = numarenas * sizeof(*arenas);
   1054         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
   1055         if (arenaobj == NULL)
   1056             return NULL;
   1057         arenas = arenaobj;
   1058 
   1059         /* We might need to fix pointers that were copied.  However,
   1060          * new_arena only gets called when all the pages in the
   1061          * previous arenas are full.  Thus, there are *no* pointers
   1062          * into the old array. Thus, we don't have to worry about
   1063          * invalid pointers.  Just to be sure, some asserts:
   1064          */
   1065         assert(usable_arenas == NULL);
   1066         assert(unused_arena_objects == NULL);
   1067 
   1068         /* Put the new arenas on the unused_arena_objects list. */
   1069         for (i = maxarenas; i < numarenas; ++i) {
   1070             arenas[i].address = 0;              /* mark as unassociated */
   1071             arenas[i].nextarena = i < numarenas - 1 ?
   1072                                    &arenas[i+1] : NULL;
   1073         }
   1074 
   1075         /* Update globals. */
   1076         unused_arena_objects = &arenas[maxarenas];
   1077         maxarenas = numarenas;
   1078     }
   1079 
   1080     /* Take the next available arena object off the head of the list. */
   1081     assert(unused_arena_objects != NULL);
   1082     arenaobj = unused_arena_objects;
   1083     unused_arena_objects = arenaobj->nextarena;
   1084     assert(arenaobj->address == 0);
   1085     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
   1086     if (address == NULL) {
   1087         /* The allocation failed: return NULL after putting the
   1088          * arenaobj back.
   1089          */
   1090         arenaobj->nextarena = unused_arena_objects;
   1091         unused_arena_objects = arenaobj;
   1092         return NULL;
   1093     }
   1094     arenaobj->address = (uintptr_t)address;
   1095 
   1096     ++narenas_currently_allocated;
   1097     ++ntimes_arena_allocated;
   1098     if (narenas_currently_allocated > narenas_highwater)
   1099         narenas_highwater = narenas_currently_allocated;
   1100     arenaobj->freepools = NULL;
   1101     /* pool_address <- first pool-aligned address in the arena
   1102        nfreepools <- number of whole pools that fit after alignment */
   1103     arenaobj->pool_address = (block*)arenaobj->address;
   1104     arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
   1105     assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
   1106     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
   1107     if (excess != 0) {
   1108         --arenaobj->nfreepools;
   1109         arenaobj->pool_address += POOL_SIZE - excess;
   1110     }
   1111     arenaobj->ntotalpools = arenaobj->nfreepools;
   1112 
   1113     return arenaobj;
   1114 }
   1115 
   1116 /*
   1117 address_in_range(P, POOL)
   1118 
   1119 Return true if and only if P is an address that was allocated by pymalloc.
   1120 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
   1121 (the caller is asked to compute this because the macro expands POOL more than
   1122 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
   1123 variable and pass the latter to the macro; because address_in_range is
   1124 called on every alloc/realloc/free, micro-efficiency is important here).
   1125 
   1126 Tricky:  Let B be the arena base address associated with the pool, B =
   1127 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
   1128 
   1129     B <= P < B + ARENA_SIZE
   1130 
   1131 Subtracting B throughout, this is true iff
   1132 
   1133     0 <= P-B < ARENA_SIZE
   1134 
   1135 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
   1136 
   1137 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
   1138 before the first arena has been allocated.  `arenas` is still NULL in that
   1139 case.  We're relying on that maxarenas is also 0 in that case, so that
   1140 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
   1141 into a NULL arenas.
   1142 
   1143 Details:  given P and POOL, the arena_object corresponding to P is AO =
   1144 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
   1145 stores, etc), POOL is the correct address of P's pool, AO.address is the
   1146 correct base address of the pool's arena, and P must be within ARENA_SIZE of
   1147 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
   1148 (NULL)).  Therefore address_in_range correctly reports that obmalloc
   1149 controls P.
   1150 
   1151 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
   1152 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
   1153 in this case -- it may even be uninitialized trash.  If the trash arenaindex
   1154 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
   1155 control P.
   1156 
   1157 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
   1158 allocated arena, obmalloc controls all the memory in slice AO.address :
   1159 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
   1160 so P doesn't lie in that slice, so the macro correctly reports that P is not
   1161 controlled by obmalloc.
   1162 
   1163 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
   1164 arena_object (one not currently associated with an allocated arena),
   1165 AO.address is 0, and the second test in the macro reduces to:
   1166 
   1167     P < ARENA_SIZE
   1168 
   1169 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
   1170 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
   1171 of the test still passes, and the third clause (AO.address != 0) is necessary
   1172 to get the correct result:  AO.address is 0 in this case, so the macro
   1173 correctly reports that P is not controlled by obmalloc (despite that P lies in
   1174 slice AO.address : AO.address + ARENA_SIZE).
   1175 
   1176 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
   1177 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
   1178 corresponded to a currently-allocated arena, so the "P is not controlled by
   1179 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
   1180 was impossible.
   1181 
   1182 Note that the logic is excruciating, and reading up possibly uninitialized
   1183 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
   1184 creates problems for some memory debuggers.  The overwhelming advantage is
   1185 that this test determines whether an arbitrary address is controlled by
   1186 obmalloc in a small constant time, independent of the number of arenas
   1187 obmalloc controls.  Since this test is needed at every entry point, it's
   1188 extremely desirable that it be this fast.
   1189 */
   1190 
   1191 static bool ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
   1192 address_in_range(void *p, poolp pool)
   1193 {
   1194     // Since address_in_range may be reading from memory which was not allocated
   1195     // by Python, it is important that pool->arenaindex is read only once, as
   1196     // another thread may be concurrently modifying the value without holding
   1197     // the GIL. The following dance forces the compiler to read pool->arenaindex
   1198     // only once.
   1199     uint arenaindex = *((volatile uint *)&pool->arenaindex);
   1200     return arenaindex < maxarenas &&
   1201         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
   1202         arenas[arenaindex].address != 0;
   1203 }
   1204 
   1205 /*==========================================================================*/
   1206 
   1207 /* malloc.  Note that nbytes==0 tries to return a non-NULL pointer, distinct
   1208  * from all other currently live pointers.  This may not be possible.
   1209  */
   1210 
   1211 /*
   1212  * The basic blocks are ordered by decreasing execution frequency,
   1213  * which minimizes the number of jumps in the most common cases,
   1214  * improves branching prediction and instruction scheduling (small
   1215  * block allocations typically result in a couple of instructions).
   1216  * Unless the optimizer reorders everything, being too smart...
   1217  */
   1218 
   1219 static void *
   1220 _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
   1221 {
   1222     size_t nbytes;
   1223     block *bp;
   1224     poolp pool;
   1225     poolp next;
   1226     uint size;
   1227 
   1228     _Py_AllocatedBlocks++;
   1229 
   1230     assert(nelem <= PY_SSIZE_T_MAX / elsize);
   1231     nbytes = nelem * elsize;
   1232 
   1233 #ifdef WITH_VALGRIND
   1234     if (UNLIKELY(running_on_valgrind == -1))
   1235         running_on_valgrind = RUNNING_ON_VALGRIND;
   1236     if (UNLIKELY(running_on_valgrind))
   1237         goto redirect;
   1238 #endif
   1239 
   1240     if (nelem == 0 || elsize == 0)
   1241         goto redirect;
   1242 
   1243     if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
   1244         LOCK();
   1245         /*
   1246          * Most frequent paths first
   1247          */
   1248         size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
   1249         pool = usedpools[size + size];
   1250         if (pool != pool->nextpool) {
   1251             /*
   1252              * There is a used pool for this size class.
   1253              * Pick up the head block of its free list.
   1254              */
   1255             ++pool->ref.count;
   1256             bp = pool->freeblock;
   1257             assert(bp != NULL);
   1258             if ((pool->freeblock = *(block **)bp) != NULL) {
   1259                 UNLOCK();
   1260                 if (use_calloc)
   1261                     memset(bp, 0, nbytes);
   1262                 return (void *)bp;
   1263             }
   1264             /*
   1265              * Reached the end of the free list, try to extend it.
   1266              */
   1267             if (pool->nextoffset <= pool->maxnextoffset) {
   1268                 /* There is room for another block. */
   1269                 pool->freeblock = (block*)pool +
   1270                                   pool->nextoffset;
   1271                 pool->nextoffset += INDEX2SIZE(size);
   1272                 *(block **)(pool->freeblock) = NULL;
   1273                 UNLOCK();
   1274                 if (use_calloc)
   1275                     memset(bp, 0, nbytes);
   1276                 return (void *)bp;
   1277             }
   1278             /* Pool is full, unlink from used pools. */
   1279             next = pool->nextpool;
   1280             pool = pool->prevpool;
   1281             next->prevpool = pool;
   1282             pool->nextpool = next;
   1283             UNLOCK();
   1284             if (use_calloc)
   1285                 memset(bp, 0, nbytes);
   1286             return (void *)bp;
   1287         }
   1288 
   1289         /* There isn't a pool of the right size class immediately
   1290          * available:  use a free pool.
   1291          */
   1292         if (usable_arenas == NULL) {
   1293             /* No arena has a free pool:  allocate a new arena. */
   1294 #ifdef WITH_MEMORY_LIMITS
   1295             if (narenas_currently_allocated >= MAX_ARENAS) {
   1296                 UNLOCK();
   1297                 goto redirect;
   1298             }
   1299 #endif
   1300             usable_arenas = new_arena();
   1301             if (usable_arenas == NULL) {
   1302                 UNLOCK();
   1303                 goto redirect;
   1304             }
   1305             usable_arenas->nextarena =
   1306                 usable_arenas->prevarena = NULL;
   1307         }
   1308         assert(usable_arenas->address != 0);
   1309 
   1310         /* Try to get a cached free pool. */
   1311         pool = usable_arenas->freepools;
   1312         if (pool != NULL) {
   1313             /* Unlink from cached pools. */
   1314             usable_arenas->freepools = pool->nextpool;
   1315 
   1316             /* This arena already had the smallest nfreepools
   1317              * value, so decreasing nfreepools doesn't change
   1318              * that, and we don't need to rearrange the
   1319              * usable_arenas list.  However, if the arena has
   1320              * become wholly allocated, we need to remove its
   1321              * arena_object from usable_arenas.
   1322              */
   1323             --usable_arenas->nfreepools;
   1324             if (usable_arenas->nfreepools == 0) {
   1325                 /* Wholly allocated:  remove. */
   1326                 assert(usable_arenas->freepools == NULL);
   1327                 assert(usable_arenas->nextarena == NULL ||
   1328                        usable_arenas->nextarena->prevarena ==
   1329                        usable_arenas);
   1330 
   1331                 usable_arenas = usable_arenas->nextarena;
   1332                 if (usable_arenas != NULL) {
   1333                     usable_arenas->prevarena = NULL;
   1334                     assert(usable_arenas->address != 0);
   1335                 }
   1336             }
   1337             else {
   1338                 /* nfreepools > 0:  it must be that freepools
   1339                  * isn't NULL, or that we haven't yet carved
   1340                  * off all the arena's pools for the first
   1341                  * time.
   1342                  */
   1343                 assert(usable_arenas->freepools != NULL ||
   1344                        usable_arenas->pool_address <=
   1345                        (block*)usable_arenas->address +
   1346                            ARENA_SIZE - POOL_SIZE);
   1347             }
   1348         init_pool:
   1349             /* Frontlink to used pools. */
   1350             next = usedpools[size + size]; /* == prev */
   1351             pool->nextpool = next;
   1352             pool->prevpool = next;
   1353             next->nextpool = pool;
   1354             next->prevpool = pool;
   1355             pool->ref.count = 1;
   1356             if (pool->szidx == size) {
   1357                 /* Luckily, this pool last contained blocks
   1358                  * of the same size class, so its header
   1359                  * and free list are already initialized.
   1360                  */
   1361                 bp = pool->freeblock;
   1362                 assert(bp != NULL);
   1363                 pool->freeblock = *(block **)bp;
   1364                 UNLOCK();
   1365                 if (use_calloc)
   1366                     memset(bp, 0, nbytes);
   1367                 return (void *)bp;
   1368             }
   1369             /*
   1370              * Initialize the pool header, set up the free list to
   1371              * contain just the second block, and return the first
   1372              * block.
   1373              */
   1374             pool->szidx = size;
   1375             size = INDEX2SIZE(size);
   1376             bp = (block *)pool + POOL_OVERHEAD;
   1377             pool->nextoffset = POOL_OVERHEAD + (size << 1);
   1378             pool->maxnextoffset = POOL_SIZE - size;
   1379             pool->freeblock = bp + size;
   1380             *(block **)(pool->freeblock) = NULL;
   1381             UNLOCK();
   1382             if (use_calloc)
   1383                 memset(bp, 0, nbytes);
   1384             return (void *)bp;
   1385         }
   1386 
   1387         /* Carve off a new pool. */
   1388         assert(usable_arenas->nfreepools > 0);
   1389         assert(usable_arenas->freepools == NULL);
   1390         pool = (poolp)usable_arenas->pool_address;
   1391         assert((block*)pool <= (block*)usable_arenas->address +
   1392                                ARENA_SIZE - POOL_SIZE);
   1393         pool->arenaindex = (uint)(usable_arenas - arenas);
   1394         assert(&arenas[pool->arenaindex] == usable_arenas);
   1395         pool->szidx = DUMMY_SIZE_IDX;
   1396         usable_arenas->pool_address += POOL_SIZE;
   1397         --usable_arenas->nfreepools;
   1398 
   1399         if (usable_arenas->nfreepools == 0) {
   1400             assert(usable_arenas->nextarena == NULL ||
   1401                    usable_arenas->nextarena->prevarena ==
   1402                    usable_arenas);
   1403             /* Unlink the arena:  it is completely allocated. */
   1404             usable_arenas = usable_arenas->nextarena;
   1405             if (usable_arenas != NULL) {
   1406                 usable_arenas->prevarena = NULL;
   1407                 assert(usable_arenas->address != 0);
   1408             }
   1409         }
   1410 
   1411         goto init_pool;
   1412     }
   1413 
   1414     /* The small block allocator ends here. */
   1415 
   1416 redirect:
   1417     /* Redirect the original request to the underlying (libc) allocator.
   1418      * We jump here on bigger requests, on error in the code above (as a
   1419      * last chance to serve the request) or when the max memory limit
   1420      * has been reached.
   1421      */
   1422     {
   1423         void *result;
   1424         if (use_calloc)
   1425             result = PyMem_RawCalloc(nelem, elsize);
   1426         else
   1427             result = PyMem_RawMalloc(nbytes);
   1428         if (!result)
   1429             _Py_AllocatedBlocks--;
   1430         return result;
   1431     }
   1432 }
   1433 
   1434 static void *
   1435 _PyObject_Malloc(void *ctx, size_t nbytes)
   1436 {
   1437     return _PyObject_Alloc(0, ctx, 1, nbytes);
   1438 }
   1439 
   1440 static void *
   1441 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
   1442 {
   1443     return _PyObject_Alloc(1, ctx, nelem, elsize);
   1444 }
   1445 
   1446 /* free */
   1447 
   1448 static void
   1449 _PyObject_Free(void *ctx, void *p)
   1450 {
   1451     poolp pool;
   1452     block *lastfree;
   1453     poolp next, prev;
   1454     uint size;
   1455 
   1456     if (p == NULL)      /* free(NULL) has no effect */
   1457         return;
   1458 
   1459     _Py_AllocatedBlocks--;
   1460 
   1461 #ifdef WITH_VALGRIND
   1462     if (UNLIKELY(running_on_valgrind > 0))
   1463         goto redirect;
   1464 #endif
   1465 
   1466     pool = POOL_ADDR(p);
   1467     if (address_in_range(p, pool)) {
   1468         /* We allocated this address. */
   1469         LOCK();
   1470         /* Link p to the start of the pool's freeblock list.  Since
   1471          * the pool had at least the p block outstanding, the pool
   1472          * wasn't empty (so it's already in a usedpools[] list, or
   1473          * was full and is in no list -- it's not in the freeblocks
   1474          * list in any case).
   1475          */
   1476         assert(pool->ref.count > 0);            /* else it was empty */
   1477         *(block **)p = lastfree = pool->freeblock;
   1478         pool->freeblock = (block *)p;
   1479         if (lastfree) {
   1480             struct arena_object* ao;
   1481             uint nf;  /* ao->nfreepools */
   1482 
   1483             /* freeblock wasn't NULL, so the pool wasn't full,
   1484              * and the pool is in a usedpools[] list.
   1485              */
   1486             if (--pool->ref.count != 0) {
   1487                 /* pool isn't empty:  leave it in usedpools */
   1488                 UNLOCK();
   1489                 return;
   1490             }
   1491             /* Pool is now empty:  unlink from usedpools, and
   1492              * link to the front of freepools.  This ensures that
   1493              * previously freed pools will be allocated later
   1494              * (being not referenced, they are perhaps paged out).
   1495              */
   1496             next = pool->nextpool;
   1497             prev = pool->prevpool;
   1498             next->prevpool = prev;
   1499             prev->nextpool = next;
   1500 
   1501             /* Link the pool to freepools.  This is a singly-linked
   1502              * list, and pool->prevpool isn't used there.
   1503              */
   1504             ao = &arenas[pool->arenaindex];
   1505             pool->nextpool = ao->freepools;
   1506             ao->freepools = pool;
   1507             nf = ++ao->nfreepools;
   1508 
   1509             /* All the rest is arena management.  We just freed
   1510              * a pool, and there are 4 cases for arena mgmt:
   1511              * 1. If all the pools are free, return the arena to
   1512              *    the system free().
   1513              * 2. If this is the only free pool in the arena,
   1514              *    add the arena back to the `usable_arenas` list.
   1515              * 3. If the "next" arena has a smaller count of free
   1516              *    pools, we have to "slide this arena right" to
   1517              *    restore that usable_arenas is sorted in order of
   1518              *    nfreepools.
   1519              * 4. Else there's nothing more to do.
   1520              */
   1521             if (nf == ao->ntotalpools) {
   1522                 /* Case 1.  First unlink ao from usable_arenas.
   1523                  */
   1524                 assert(ao->prevarena == NULL ||
   1525                        ao->prevarena->address != 0);
   1526                 assert(ao ->nextarena == NULL ||
   1527                        ao->nextarena->address != 0);
   1528 
   1529                 /* Fix the pointer in the prevarena, or the
   1530                  * usable_arenas pointer.
   1531                  */
   1532                 if (ao->prevarena == NULL) {
   1533                     usable_arenas = ao->nextarena;
   1534                     assert(usable_arenas == NULL ||
   1535                            usable_arenas->address != 0);
   1536                 }
   1537                 else {
   1538                     assert(ao->prevarena->nextarena == ao);
   1539                     ao->prevarena->nextarena =
   1540                         ao->nextarena;
   1541                 }
   1542                 /* Fix the pointer in the nextarena. */
   1543                 if (ao->nextarena != NULL) {
   1544                     assert(ao->nextarena->prevarena == ao);
   1545                     ao->nextarena->prevarena =
   1546                         ao->prevarena;
   1547                 }
   1548                 /* Record that this arena_object slot is
   1549                  * available to be reused.
   1550                  */
   1551                 ao->nextarena = unused_arena_objects;
   1552                 unused_arena_objects = ao;
   1553 
   1554                 /* Free the entire arena. */
   1555                 _PyObject_Arena.free(_PyObject_Arena.ctx,
   1556                                      (void *)ao->address, ARENA_SIZE);
   1557                 ao->address = 0;                        /* mark unassociated */
   1558                 --narenas_currently_allocated;
   1559 
   1560                 UNLOCK();
   1561                 return;
   1562             }
   1563             if (nf == 1) {
   1564                 /* Case 2.  Put ao at the head of
   1565                  * usable_arenas.  Note that because
   1566                  * ao->nfreepools was 0 before, ao isn't
   1567                  * currently on the usable_arenas list.
   1568                  */
   1569                 ao->nextarena = usable_arenas;
   1570                 ao->prevarena = NULL;
   1571                 if (usable_arenas)
   1572                     usable_arenas->prevarena = ao;
   1573                 usable_arenas = ao;
   1574                 assert(usable_arenas->address != 0);
   1575 
   1576                 UNLOCK();
   1577                 return;
   1578             }
   1579             /* If this arena is now out of order, we need to keep
   1580              * the list sorted.  The list is kept sorted so that
   1581              * the "most full" arenas are used first, which allows
   1582              * the nearly empty arenas to be completely freed.  In
   1583              * a few un-scientific tests, it seems like this
   1584              * approach allowed a lot more memory to be freed.
   1585              */
   1586             if (ao->nextarena == NULL ||
   1587                          nf <= ao->nextarena->nfreepools) {
   1588                 /* Case 4.  Nothing to do. */
   1589                 UNLOCK();
   1590                 return;
   1591             }
   1592             /* Case 3:  We have to move the arena towards the end
   1593              * of the list, because it has more free pools than
   1594              * the arena to its right.
   1595              * First unlink ao from usable_arenas.
   1596              */
   1597             if (ao->prevarena != NULL) {
   1598                 /* ao isn't at the head of the list */
   1599                 assert(ao->prevarena->nextarena == ao);
   1600                 ao->prevarena->nextarena = ao->nextarena;
   1601             }
   1602             else {
   1603                 /* ao is at the head of the list */
   1604                 assert(usable_arenas == ao);
   1605                 usable_arenas = ao->nextarena;
   1606             }
   1607             ao->nextarena->prevarena = ao->prevarena;
   1608 
   1609             /* Locate the new insertion point by iterating over
   1610              * the list, using our nextarena pointer.
   1611              */
   1612             while (ao->nextarena != NULL &&
   1613                             nf > ao->nextarena->nfreepools) {
   1614                 ao->prevarena = ao->nextarena;
   1615                 ao->nextarena = ao->nextarena->nextarena;
   1616             }
   1617 
   1618             /* Insert ao at this point. */
   1619             assert(ao->nextarena == NULL ||
   1620                 ao->prevarena == ao->nextarena->prevarena);
   1621             assert(ao->prevarena->nextarena == ao->nextarena);
   1622 
   1623             ao->prevarena->nextarena = ao;
   1624             if (ao->nextarena != NULL)
   1625                 ao->nextarena->prevarena = ao;
   1626 
   1627             /* Verify that the swaps worked. */
   1628             assert(ao->nextarena == NULL ||
   1629                       nf <= ao->nextarena->nfreepools);
   1630             assert(ao->prevarena == NULL ||
   1631                       nf > ao->prevarena->nfreepools);
   1632             assert(ao->nextarena == NULL ||
   1633                 ao->nextarena->prevarena == ao);
   1634             assert((usable_arenas == ao &&
   1635                 ao->prevarena == NULL) ||
   1636                 ao->prevarena->nextarena == ao);
   1637 
   1638             UNLOCK();
   1639             return;
   1640         }
   1641         /* Pool was full, so doesn't currently live in any list:
   1642          * link it to the front of the appropriate usedpools[] list.
   1643          * This mimics LRU pool usage for new allocations and
   1644          * targets optimal filling when several pools contain
   1645          * blocks of the same size class.
   1646          */
   1647         --pool->ref.count;
   1648         assert(pool->ref.count > 0);            /* else the pool is empty */
   1649         size = pool->szidx;
   1650         next = usedpools[size + size];
   1651         prev = next->prevpool;
   1652         /* insert pool before next:   prev <-> pool <-> next */
   1653         pool->nextpool = next;
   1654         pool->prevpool = prev;
   1655         next->prevpool = pool;
   1656         prev->nextpool = pool;
   1657         UNLOCK();
   1658         return;
   1659     }
   1660 
   1661 #ifdef WITH_VALGRIND
   1662 redirect:
   1663 #endif
   1664     /* We didn't allocate this address. */
   1665     PyMem_RawFree(p);
   1666 }
   1667 
   1668 /* realloc.  If p is NULL, this acts like malloc(nbytes).  Else if nbytes==0,
   1669  * then as the Python docs promise, we do not treat this like free(p), and
   1670  * return a non-NULL result.
   1671  */
   1672 
   1673 static void *
   1674 _PyObject_Realloc(void *ctx, void *p, size_t nbytes)
   1675 {
   1676     void *bp;
   1677     poolp pool;
   1678     size_t size;
   1679 
   1680     if (p == NULL)
   1681         return _PyObject_Alloc(0, ctx, 1, nbytes);
   1682 
   1683 #ifdef WITH_VALGRIND
   1684     /* Treat running_on_valgrind == -1 the same as 0 */
   1685     if (UNLIKELY(running_on_valgrind > 0))
   1686         goto redirect;
   1687 #endif
   1688 
   1689     pool = POOL_ADDR(p);
   1690     if (address_in_range(p, pool)) {
   1691         /* We're in charge of this block */
   1692         size = INDEX2SIZE(pool->szidx);
   1693         if (nbytes <= size) {
   1694             /* The block is staying the same or shrinking.  If
   1695              * it's shrinking, there's a tradeoff:  it costs
   1696              * cycles to copy the block to a smaller size class,
   1697              * but it wastes memory not to copy it.  The
   1698              * compromise here is to copy on shrink only if at
   1699              * least 25% of size can be shaved off.
   1700              */
   1701             if (4 * nbytes > 3 * size) {
   1702                 /* It's the same,
   1703                  * or shrinking and new/old > 3/4.
   1704                  */
   1705                 return p;
   1706             }
   1707             size = nbytes;
   1708         }
   1709         bp = _PyObject_Alloc(0, ctx, 1, nbytes);
   1710         if (bp != NULL) {
   1711             memcpy(bp, p, size);
   1712             _PyObject_Free(ctx, p);
   1713         }
   1714         return bp;
   1715     }
   1716 #ifdef WITH_VALGRIND
   1717  redirect:
   1718 #endif
   1719     /* We're not managing this block.  If nbytes <=
   1720      * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
   1721      * block.  However, if we do, we need to copy the valid data from
   1722      * the C-managed block to one of our blocks, and there's no portable
   1723      * way to know how much of the memory space starting at p is valid.
   1724      * As bug 1185883 pointed out the hard way, it's possible that the
   1725      * C-managed block is "at the end" of allocated VM space, so that
   1726      * a memory fault can occur if we try to copy nbytes bytes starting
   1727      * at p.  Instead we punt:  let C continue to manage this block.
   1728      */
   1729     if (nbytes)
   1730         return PyMem_RawRealloc(p, nbytes);
   1731     /* C doesn't define the result of realloc(p, 0) (it may or may not
   1732      * return NULL then), but Python's docs promise that nbytes==0 never
   1733      * returns NULL.  We don't pass 0 to realloc(), to avoid that endcase
   1734      * to begin with.  Even then, we can't be sure that realloc() won't
   1735      * return NULL.
   1736      */
   1737     bp = PyMem_RawRealloc(p, 1);
   1738     return bp ? bp : p;
   1739 }
   1740 
   1741 #else   /* ! WITH_PYMALLOC */
   1742 
   1743 /*==========================================================================*/
   1744 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
   1745  * only be used by extensions that are compiled with pymalloc enabled. */
   1746 
   1747 Py_ssize_t
   1748 _Py_GetAllocatedBlocks(void)
   1749 {
   1750     return 0;
   1751 }
   1752 
   1753 #endif /* WITH_PYMALLOC */
   1754 
   1755 
   1756 /*==========================================================================*/
   1757 /* A x-platform debugging allocator.  This doesn't manage memory directly,
   1758  * it wraps a real allocator, adding extra debugging info to the memory blocks.
   1759  */
   1760 
   1761 /* Special bytes broadcast into debug memory blocks at appropriate times.
   1762  * Strings of these are unlikely to be valid addresses, floats, ints or
   1763  * 7-bit ASCII.
   1764  */
   1765 #undef CLEANBYTE
   1766 #undef DEADBYTE
   1767 #undef FORBIDDENBYTE
   1768 #define CLEANBYTE      0xCB    /* clean (newly allocated) memory */
   1769 #define DEADBYTE       0xDB    /* dead (newly freed) memory */
   1770 #define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
   1771 
   1772 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
   1773 
   1774 /* serialno is always incremented via calling this routine.  The point is
   1775  * to supply a single place to set a breakpoint.
   1776  */
   1777 static void
   1778 bumpserialno(void)
   1779 {
   1780     ++serialno;
   1781 }
   1782 
   1783 #define SST SIZEOF_SIZE_T
   1784 
   1785 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
   1786 static size_t
   1787 read_size_t(const void *p)
   1788 {
   1789     const uint8_t *q = (const uint8_t *)p;
   1790     size_t result = *q++;
   1791     int i;
   1792 
   1793     for (i = SST; --i > 0; ++q)
   1794         result = (result << 8) | *q;
   1795     return result;
   1796 }
   1797 
   1798 /* Write n as a big-endian size_t, MSB at address p, LSB at
   1799  * p + sizeof(size_t) - 1.
   1800  */
   1801 static void
   1802 write_size_t(void *p, size_t n)
   1803 {
   1804     uint8_t *q = (uint8_t *)p + SST - 1;
   1805     int i;
   1806 
   1807     for (i = SST; --i >= 0; --q) {
   1808         *q = (uint8_t)(n & 0xff);
   1809         n >>= 8;
   1810     }
   1811 }
   1812 
   1813 /* Let S = sizeof(size_t).  The debug malloc asks for 4*S extra bytes and
   1814    fills them with useful stuff, here calling the underlying malloc's result p:
   1815 
   1816 p[0: S]
   1817     Number of bytes originally asked for.  This is a size_t, big-endian (easier
   1818     to read in a memory dump).
   1819 p[S]
   1820     API ID.  See PEP 445.  This is a character, but seems undocumented.
   1821 p[S+1: 2*S]
   1822     Copies of FORBIDDENBYTE.  Used to catch under- writes and reads.
   1823 p[2*S: 2*S+n]
   1824     The requested memory, filled with copies of CLEANBYTE.
   1825     Used to catch reference to uninitialized memory.
   1826     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
   1827     handled the request itself.
   1828 p[2*S+n: 2*S+n+S]
   1829     Copies of FORBIDDENBYTE.  Used to catch over- writes and reads.
   1830 p[2*S+n+S: 2*S+n+2*S]
   1831     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
   1832     and _PyMem_DebugRealloc.
   1833     This is a big-endian size_t.
   1834     If "bad memory" is detected later, the serial number gives an
   1835     excellent way to set a breakpoint on the next run, to capture the
   1836     instant at which this block was passed out.
   1837 */
   1838 
   1839 static void *
   1840 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
   1841 {
   1842     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
   1843     uint8_t *p;           /* base address of malloc'ed block */
   1844     uint8_t *tail;        /* p + 2*SST + nbytes == pointer to tail pad bytes */
   1845     size_t total;       /* nbytes + 4*SST */
   1846 
   1847     bumpserialno();
   1848     total = nbytes + 4*SST;
   1849     if (nbytes > PY_SSIZE_T_MAX - 4*SST)
   1850         /* overflow:  can't represent total as a Py_ssize_t */
   1851         return NULL;
   1852 
   1853     if (use_calloc)
   1854         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
   1855     else
   1856         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
   1857     if (p == NULL)
   1858         return NULL;
   1859 
   1860     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
   1861     write_size_t(p, nbytes);
   1862     p[SST] = (uint8_t)api->api_id;
   1863     memset(p + SST + 1, FORBIDDENBYTE, SST-1);
   1864 
   1865     if (nbytes > 0 && !use_calloc)
   1866         memset(p + 2*SST, CLEANBYTE, nbytes);
   1867 
   1868     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
   1869     tail = p + 2*SST + nbytes;
   1870     memset(tail, FORBIDDENBYTE, SST);
   1871     write_size_t(tail + SST, serialno);
   1872 
   1873     return p + 2*SST;
   1874 }
   1875 
   1876 static void *
   1877 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
   1878 {
   1879     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
   1880 }
   1881 
   1882 static void *
   1883 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
   1884 {
   1885     size_t nbytes;
   1886     assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
   1887     nbytes = nelem * elsize;
   1888     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
   1889 }
   1890 
   1891 /* The debug free first checks the 2*SST bytes on each end for sanity (in
   1892    particular, that the FORBIDDENBYTEs with the api ID are still intact).
   1893    Then fills the original bytes with DEADBYTE.
   1894    Then calls the underlying free.
   1895 */
   1896 static void
   1897 _PyMem_DebugRawFree(void *ctx, void *p)
   1898 {
   1899     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
   1900     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
   1901     size_t nbytes;
   1902 
   1903     if (p == NULL)
   1904         return;
   1905     _PyMem_DebugCheckAddress(api->api_id, p);
   1906     nbytes = read_size_t(q);
   1907     nbytes += 4*SST;
   1908     if (nbytes > 0)
   1909         memset(q, DEADBYTE, nbytes);
   1910     api->alloc.free(api->alloc.ctx, q);
   1911 }
   1912 
   1913 static void *
   1914 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
   1915 {
   1916     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
   1917     uint8_t *q = (uint8_t *)p, *oldq;
   1918     uint8_t *tail;
   1919     size_t total;       /* nbytes + 4*SST */
   1920     size_t original_nbytes;
   1921     int i;
   1922 
   1923     if (p == NULL)
   1924         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
   1925 
   1926     _PyMem_DebugCheckAddress(api->api_id, p);
   1927     bumpserialno();
   1928     original_nbytes = read_size_t(q - 2*SST);
   1929     total = nbytes + 4*SST;
   1930     if (nbytes > PY_SSIZE_T_MAX - 4*SST)
   1931         /* overflow:  can't represent total as a Py_ssize_t */
   1932         return NULL;
   1933 
   1934     /* Resize and add decorations. We may get a new pointer here, in which
   1935      * case we didn't get the chance to mark the old memory with DEADBYTE,
   1936      * but we live with that.
   1937      */
   1938     oldq = q;
   1939     q = (uint8_t *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
   1940     if (q == NULL)
   1941         return NULL;
   1942 
   1943     if (q == oldq && nbytes < original_nbytes) {
   1944         /* shrinking:  mark old extra memory dead */
   1945         memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
   1946     }
   1947 
   1948     write_size_t(q, nbytes);
   1949     assert(q[SST] == (uint8_t)api->api_id);
   1950     for (i = 1; i < SST; ++i)
   1951         assert(q[SST + i] == FORBIDDENBYTE);
   1952     q += 2*SST;
   1953 
   1954     tail = q + nbytes;
   1955     memset(tail, FORBIDDENBYTE, SST);
   1956     write_size_t(tail + SST, serialno);
   1957 
   1958     if (nbytes > original_nbytes) {
   1959         /* growing:  mark new extra memory clean */
   1960         memset(q + original_nbytes, CLEANBYTE,
   1961                nbytes - original_nbytes);
   1962     }
   1963 
   1964     return q;
   1965 }
   1966 
   1967 static void
   1968 _PyMem_DebugCheckGIL(void)
   1969 {
   1970 #ifdef WITH_THREAD
   1971     if (!PyGILState_Check())
   1972         Py_FatalError("Python memory allocator called "
   1973                       "without holding the GIL");
   1974 #endif
   1975 }
   1976 
   1977 static void *
   1978 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
   1979 {
   1980     _PyMem_DebugCheckGIL();
   1981     return _PyMem_DebugRawMalloc(ctx, nbytes);
   1982 }
   1983 
   1984 static void *
   1985 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
   1986 {
   1987     _PyMem_DebugCheckGIL();
   1988     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
   1989 }
   1990 
   1991 static void
   1992 _PyMem_DebugFree(void *ctx, void *ptr)
   1993 {
   1994     _PyMem_DebugCheckGIL();
   1995     _PyMem_DebugRawFree(ctx, ptr);
   1996 }
   1997 
   1998 static void *
   1999 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
   2000 {
   2001     _PyMem_DebugCheckGIL();
   2002     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
   2003 }
   2004 
   2005 /* Check the forbidden bytes on both ends of the memory allocated for p.
   2006  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
   2007  * and call Py_FatalError to kill the program.
   2008  * The API id, is also checked.
   2009  */
   2010 static void
   2011 _PyMem_DebugCheckAddress(char api, const void *p)
   2012 {
   2013     const uint8_t *q = (const uint8_t *)p;
   2014     char msgbuf[64];
   2015     char *msg;
   2016     size_t nbytes;
   2017     const uint8_t *tail;
   2018     int i;
   2019     char id;
   2020 
   2021     if (p == NULL) {
   2022         msg = "didn't expect a NULL pointer";
   2023         goto error;
   2024     }
   2025 
   2026     /* Check the API id */
   2027     id = (char)q[-SST];
   2028     if (id != api) {
   2029         msg = msgbuf;
   2030         snprintf(msg, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
   2031         msgbuf[sizeof(msgbuf)-1] = 0;
   2032         goto error;
   2033     }
   2034 
   2035     /* Check the stuff at the start of p first:  if there's underwrite
   2036      * corruption, the number-of-bytes field may be nuts, and checking
   2037      * the tail could lead to a segfault then.
   2038      */
   2039     for (i = SST-1; i >= 1; --i) {
   2040         if (*(q-i) != FORBIDDENBYTE) {
   2041             msg = "bad leading pad byte";
   2042             goto error;
   2043         }
   2044     }
   2045 
   2046     nbytes = read_size_t(q - 2*SST);
   2047     tail = q + nbytes;
   2048     for (i = 0; i < SST; ++i) {
   2049         if (tail[i] != FORBIDDENBYTE) {
   2050             msg = "bad trailing pad byte";
   2051             goto error;
   2052         }
   2053     }
   2054 
   2055     return;
   2056 
   2057 error:
   2058     _PyObject_DebugDumpAddress(p);
   2059     Py_FatalError(msg);
   2060 }
   2061 
   2062 /* Display info to stderr about the memory block at p. */
   2063 static void
   2064 _PyObject_DebugDumpAddress(const void *p)
   2065 {
   2066     const uint8_t *q = (const uint8_t *)p;
   2067     const uint8_t *tail;
   2068     size_t nbytes, serial;
   2069     int i;
   2070     int ok;
   2071     char id;
   2072 
   2073     fprintf(stderr, "Debug memory block at address p=%p:", p);
   2074     if (p == NULL) {
   2075         fprintf(stderr, "\n");
   2076         return;
   2077     }
   2078     id = (char)q[-SST];
   2079     fprintf(stderr, " API '%c'\n", id);
   2080 
   2081     nbytes = read_size_t(q - 2*SST);
   2082     fprintf(stderr, "    %" PY_FORMAT_SIZE_T "u bytes originally "
   2083                     "requested\n", nbytes);
   2084 
   2085     /* In case this is nuts, check the leading pad bytes first. */
   2086     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
   2087     ok = 1;
   2088     for (i = 1; i <= SST-1; ++i) {
   2089         if (*(q-i) != FORBIDDENBYTE) {
   2090             ok = 0;
   2091             break;
   2092         }
   2093     }
   2094     if (ok)
   2095         fputs("FORBIDDENBYTE, as expected.\n", stderr);
   2096     else {
   2097         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
   2098             FORBIDDENBYTE);
   2099         for (i = SST-1; i >= 1; --i) {
   2100             const uint8_t byte = *(q-i);
   2101             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
   2102             if (byte != FORBIDDENBYTE)
   2103                 fputs(" *** OUCH", stderr);
   2104             fputc('\n', stderr);
   2105         }
   2106 
   2107         fputs("    Because memory is corrupted at the start, the "
   2108               "count of bytes requested\n"
   2109               "       may be bogus, and checking the trailing pad "
   2110               "bytes may segfault.\n", stderr);
   2111     }
   2112 
   2113     tail = q + nbytes;
   2114     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, tail);
   2115     ok = 1;
   2116     for (i = 0; i < SST; ++i) {
   2117         if (tail[i] != FORBIDDENBYTE) {
   2118             ok = 0;
   2119             break;
   2120         }
   2121     }
   2122     if (ok)
   2123         fputs("FORBIDDENBYTE, as expected.\n", stderr);
   2124     else {
   2125         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
   2126                 FORBIDDENBYTE);
   2127         for (i = 0; i < SST; ++i) {
   2128             const uint8_t byte = tail[i];
   2129             fprintf(stderr, "        at tail+%d: 0x%02x",
   2130                     i, byte);
   2131             if (byte != FORBIDDENBYTE)
   2132                 fputs(" *** OUCH", stderr);
   2133             fputc('\n', stderr);
   2134         }
   2135     }
   2136 
   2137     serial = read_size_t(tail + SST);
   2138     fprintf(stderr, "    The block was made by call #%" PY_FORMAT_SIZE_T
   2139                     "u to debug malloc/realloc.\n", serial);
   2140 
   2141     if (nbytes > 0) {
   2142         i = 0;
   2143         fputs("    Data at p:", stderr);
   2144         /* print up to 8 bytes at the start */
   2145         while (q < tail && i < 8) {
   2146             fprintf(stderr, " %02x", *q);
   2147             ++i;
   2148             ++q;
   2149         }
   2150         /* and up to 8 at the end */
   2151         if (q < tail) {
   2152             if (tail - q > 8) {
   2153                 fputs(" ...", stderr);
   2154                 q = tail - 8;
   2155             }
   2156             while (q < tail) {
   2157                 fprintf(stderr, " %02x", *q);
   2158                 ++q;
   2159             }
   2160         }
   2161         fputc('\n', stderr);
   2162     }
   2163     fputc('\n', stderr);
   2164 
   2165     fflush(stderr);
   2166     _PyMem_DumpTraceback(fileno(stderr), p);
   2167 }
   2168 
   2169 
   2170 static size_t
   2171 printone(FILE *out, const char* msg, size_t value)
   2172 {
   2173     int i, k;
   2174     char buf[100];
   2175     size_t origvalue = value;
   2176 
   2177     fputs(msg, out);
   2178     for (i = (int)strlen(msg); i < 35; ++i)
   2179         fputc(' ', out);
   2180     fputc('=', out);
   2181 
   2182     /* Write the value with commas. */
   2183     i = 22;
   2184     buf[i--] = '\0';
   2185     buf[i--] = '\n';
   2186     k = 3;
   2187     do {
   2188         size_t nextvalue = value / 10;
   2189         unsigned int digit = (unsigned int)(value - nextvalue * 10);
   2190         value = nextvalue;
   2191         buf[i--] = (char)(digit + '0');
   2192         --k;
   2193         if (k == 0 && value && i >= 0) {
   2194             k = 3;
   2195             buf[i--] = ',';
   2196         }
   2197     } while (value && i >= 0);
   2198 
   2199     while (i >= 0)
   2200         buf[i--] = ' ';
   2201     fputs(buf, out);
   2202 
   2203     return origvalue;
   2204 }
   2205 
   2206 void
   2207 _PyDebugAllocatorStats(FILE *out,
   2208                        const char *block_name, int num_blocks, size_t sizeof_block)
   2209 {
   2210     char buf1[128];
   2211     char buf2[128];
   2212     PyOS_snprintf(buf1, sizeof(buf1),
   2213                   "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
   2214                   num_blocks, block_name, sizeof_block);
   2215     PyOS_snprintf(buf2, sizeof(buf2),
   2216                   "%48s ", buf1);
   2217     (void)printone(out, buf2, num_blocks * sizeof_block);
   2218 }
   2219 
   2220 
   2221 #ifdef WITH_PYMALLOC
   2222 
   2223 #ifdef Py_DEBUG
   2224 /* Is target in the list?  The list is traversed via the nextpool pointers.
   2225  * The list may be NULL-terminated, or circular.  Return 1 if target is in
   2226  * list, else 0.
   2227  */
   2228 static int
   2229 pool_is_in_list(const poolp target, poolp list)
   2230 {
   2231     poolp origlist = list;
   2232     assert(target != NULL);
   2233     if (list == NULL)
   2234         return 0;
   2235     do {
   2236         if (target == list)
   2237             return 1;
   2238         list = list->nextpool;
   2239     } while (list != NULL && list != origlist);
   2240     return 0;
   2241 }
   2242 #endif
   2243 
   2244 /* Print summary info to "out" about the state of pymalloc's structures.
   2245  * In Py_DEBUG mode, also perform some expensive internal consistency
   2246  * checks.
   2247  */
   2248 void
   2249 _PyObject_DebugMallocStats(FILE *out)
   2250 {
   2251     uint i;
   2252     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
   2253     /* # of pools, allocated blocks, and free blocks per class index */
   2254     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
   2255     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
   2256     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
   2257     /* total # of allocated bytes in used and full pools */
   2258     size_t allocated_bytes = 0;
   2259     /* total # of available bytes in used pools */
   2260     size_t available_bytes = 0;
   2261     /* # of free pools + pools not yet carved out of current arena */
   2262     uint numfreepools = 0;
   2263     /* # of bytes for arena alignment padding */
   2264     size_t arena_alignment = 0;
   2265     /* # of bytes in used and full pools used for pool_headers */
   2266     size_t pool_header_bytes = 0;
   2267     /* # of bytes in used and full pools wasted due to quantization,
   2268      * i.e. the necessarily leftover space at the ends of used and
   2269      * full pools.
   2270      */
   2271     size_t quantization = 0;
   2272     /* # of arenas actually allocated. */
   2273     size_t narenas = 0;
   2274     /* running total -- should equal narenas * ARENA_SIZE */
   2275     size_t total;
   2276     char buf[128];
   2277 
   2278     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
   2279             SMALL_REQUEST_THRESHOLD, numclasses);
   2280 
   2281     for (i = 0; i < numclasses; ++i)
   2282         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
   2283 
   2284     /* Because full pools aren't linked to from anything, it's easiest
   2285      * to march over all the arenas.  If we're lucky, most of the memory
   2286      * will be living in full pools -- would be a shame to miss them.
   2287      */
   2288     for (i = 0; i < maxarenas; ++i) {
   2289         uint j;
   2290         uintptr_t base = arenas[i].address;
   2291 
   2292         /* Skip arenas which are not allocated. */
   2293         if (arenas[i].address == (uintptr_t)NULL)
   2294             continue;
   2295         narenas += 1;
   2296 
   2297         numfreepools += arenas[i].nfreepools;
   2298 
   2299         /* round up to pool alignment */
   2300         if (base & (uintptr_t)POOL_SIZE_MASK) {
   2301             arena_alignment += POOL_SIZE;
   2302             base &= ~(uintptr_t)POOL_SIZE_MASK;
   2303             base += POOL_SIZE;
   2304         }
   2305 
   2306         /* visit every pool in the arena */
   2307         assert(base <= (uintptr_t) arenas[i].pool_address);
   2308         for (j = 0; base < (uintptr_t) arenas[i].pool_address;
   2309              ++j, base += POOL_SIZE) {
   2310             poolp p = (poolp)base;
   2311             const uint sz = p->szidx;
   2312             uint freeblocks;
   2313 
   2314             if (p->ref.count == 0) {
   2315                 /* currently unused */
   2316 #ifdef Py_DEBUG
   2317                 assert(pool_is_in_list(p, arenas[i].freepools));
   2318 #endif
   2319                 continue;
   2320             }
   2321             ++numpools[sz];
   2322             numblocks[sz] += p->ref.count;
   2323             freeblocks = NUMBLOCKS(sz) - p->ref.count;
   2324             numfreeblocks[sz] += freeblocks;
   2325 #ifdef Py_DEBUG
   2326             if (freeblocks > 0)
   2327                 assert(pool_is_in_list(p, usedpools[sz + sz]));
   2328 #endif
   2329         }
   2330     }
   2331     assert(narenas == narenas_currently_allocated);
   2332 
   2333     fputc('\n', out);
   2334     fputs("class   size   num pools   blocks in use  avail blocks\n"
   2335           "-----   ----   ---------   -------------  ------------\n",
   2336           out);
   2337 
   2338     for (i = 0; i < numclasses; ++i) {
   2339         size_t p = numpools[i];
   2340         size_t b = numblocks[i];
   2341         size_t f = numfreeblocks[i];
   2342         uint size = INDEX2SIZE(i);
   2343         if (p == 0) {
   2344             assert(b == 0 && f == 0);
   2345             continue;
   2346         }
   2347         fprintf(out, "%5u %6u "
   2348                         "%11" PY_FORMAT_SIZE_T "u "
   2349                         "%15" PY_FORMAT_SIZE_T "u "
   2350                         "%13" PY_FORMAT_SIZE_T "u\n",
   2351                 i, size, p, b, f);
   2352         allocated_bytes += b * size;
   2353         available_bytes += f * size;
   2354         pool_header_bytes += p * POOL_OVERHEAD;
   2355         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
   2356     }
   2357     fputc('\n', out);
   2358     if (_PyMem_DebugEnabled())
   2359         (void)printone(out, "# times object malloc called", serialno);
   2360     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
   2361     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
   2362     (void)printone(out, "# arenas highwater mark", narenas_highwater);
   2363     (void)printone(out, "# arenas allocated current", narenas);
   2364 
   2365     PyOS_snprintf(buf, sizeof(buf),
   2366         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
   2367         narenas, ARENA_SIZE);
   2368     (void)printone(out, buf, narenas * ARENA_SIZE);
   2369 
   2370     fputc('\n', out);
   2371 
   2372     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
   2373     total += printone(out, "# bytes in available blocks", available_bytes);
   2374 
   2375     PyOS_snprintf(buf, sizeof(buf),
   2376         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
   2377     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
   2378 
   2379     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
   2380     total += printone(out, "# bytes lost to quantization", quantization);
   2381     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
   2382     (void)printone(out, "Total", total);
   2383 }
   2384 
   2385 #endif /* #ifdef WITH_PYMALLOC */
   2386