Home | History | Annotate | Download | only in Objects
      1 #include "Python.h"
      2 
      3 #include <stdbool.h>
      4 
      5 
      6 /* Defined in tracemalloc.c */
      7 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
      8 
      9 
     10 /* Python's malloc wrappers (see pymem.h) */
     11 
     12 #undef  uint
     13 #define uint    unsigned int    /* assuming >= 16 bits */
     14 
     15 /* Forward declaration */
     16 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
     17 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
     18 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
     19 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
     20 
     21 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
     22 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
     23 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
     24 static void _PyMem_DebugFree(void *ctx, void *p);
     25 
     26 static void _PyObject_DebugDumpAddress(const void *p);
     27 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
     28 
     29 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
     30 
     31 #if defined(__has_feature)  /* Clang */
     32 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
     33 #    define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
     34         __attribute__((no_address_safety_analysis))
     35 #  endif
     36 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
     37 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
     38 #  endif
     39 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
     40 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
     41 #  endif
     42 #elif defined(__GNUC__)
     43 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
     44 #    define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
     45         __attribute__((no_address_safety_analysis))
     46 #  endif
     47    // TSAN is supported since GCC 4.8, but __SANITIZE_THREAD__ macro
     48    // is provided only since GCC 7.
     49 #  if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
     50 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
     51 #  endif
     52 #endif
     53 
     54 #ifndef _Py_NO_ADDRESS_SAFETY_ANALYSIS
     55 #  define _Py_NO_ADDRESS_SAFETY_ANALYSIS
     56 #endif
     57 #ifndef _Py_NO_SANITIZE_THREAD
     58 #  define _Py_NO_SANITIZE_THREAD
     59 #endif
     60 #ifndef _Py_NO_SANITIZE_MEMORY
     61 #  define _Py_NO_SANITIZE_MEMORY
     62 #endif
     63 
     64 #ifdef WITH_PYMALLOC
     65 
     66 #ifdef MS_WINDOWS
     67 #  include <windows.h>
     68 #elif defined(HAVE_MMAP)
     69 #  include <sys/mman.h>
     70 #  ifdef MAP_ANONYMOUS
     71 #    define ARENAS_USE_MMAP
     72 #  endif
     73 #endif
     74 
     75 /* Forward declaration */
     76 static void* _PyObject_Malloc(void *ctx, size_t size);
     77 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
     78 static void _PyObject_Free(void *ctx, void *p);
     79 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
     80 #endif
     81 
     82 
     83 static void *
     84 _PyMem_RawMalloc(void *ctx, size_t size)
     85 {
     86     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
     87        for malloc(0), which would be treated as an error. Some platforms would
     88        return a pointer with no memory behind it, which would break pymalloc.
     89        To solve these problems, allocate an extra byte. */
     90     if (size == 0)
     91         size = 1;
     92     return malloc(size);
     93 }
     94 
     95 static void *
     96 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
     97 {
     98     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
     99        for calloc(0, 0), which would be treated as an error. Some platforms
    100        would return a pointer with no memory behind it, which would break
    101        pymalloc.  To solve these problems, allocate an extra byte. */
    102     if (nelem == 0 || elsize == 0) {
    103         nelem = 1;
    104         elsize = 1;
    105     }
    106     return calloc(nelem, elsize);
    107 }
    108 
    109 static void *
    110 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
    111 {
    112     if (size == 0)
    113         size = 1;
    114     return realloc(ptr, size);
    115 }
    116 
    117 static void
    118 _PyMem_RawFree(void *ctx, void *ptr)
    119 {
    120     free(ptr);
    121 }
    122 
    123 
    124 #ifdef MS_WINDOWS
    125 static void *
    126 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
    127 {
    128     return VirtualAlloc(NULL, size,
    129                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
    130 }
    131 
    132 static void
    133 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
    134 {
    135     VirtualFree(ptr, 0, MEM_RELEASE);
    136 }
    137 
    138 #elif defined(ARENAS_USE_MMAP)
    139 static void *
    140 _PyObject_ArenaMmap(void *ctx, size_t size)
    141 {
    142     void *ptr;
    143     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
    144                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    145     if (ptr == MAP_FAILED)
    146         return NULL;
    147     assert(ptr != NULL);
    148     return ptr;
    149 }
    150 
    151 static void
    152 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
    153 {
    154     munmap(ptr, size);
    155 }
    156 
    157 #else
    158 static void *
    159 _PyObject_ArenaMalloc(void *ctx, size_t size)
    160 {
    161     return malloc(size);
    162 }
    163 
    164 static void
    165 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
    166 {
    167     free(ptr);
    168 }
    169 #endif
    170 
    171 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
    172 #ifdef WITH_PYMALLOC
    173 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
    174 #endif
    175 
    176 #define PYRAW_ALLOC MALLOC_ALLOC
    177 #ifdef WITH_PYMALLOC
    178 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
    179 #else
    180 #  define PYOBJ_ALLOC MALLOC_ALLOC
    181 #endif
    182 #define PYMEM_ALLOC PYOBJ_ALLOC
    183 
    184 typedef struct {
    185     /* We tag each block with an API ID in order to tag API violations */
    186     char api_id;
    187     PyMemAllocatorEx alloc;
    188 } debug_alloc_api_t;
    189 static struct {
    190     debug_alloc_api_t raw;
    191     debug_alloc_api_t mem;
    192     debug_alloc_api_t obj;
    193 } _PyMem_Debug = {
    194     {'r', PYRAW_ALLOC},
    195     {'m', PYMEM_ALLOC},
    196     {'o', PYOBJ_ALLOC}
    197     };
    198 
    199 #define PYDBGRAW_ALLOC \
    200     {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
    201 #define PYDBGMEM_ALLOC \
    202     {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
    203 #define PYDBGOBJ_ALLOC \
    204     {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
    205 
    206 #ifdef Py_DEBUG
    207 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
    208 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
    209 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
    210 #else
    211 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
    212 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
    213 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
    214 #endif
    215 
    216 
    217 static int
    218 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
    219                             PyMemAllocatorEx *old_alloc)
    220 {
    221     if (old_alloc != NULL) {
    222         PyMem_GetAllocator(domain, old_alloc);
    223     }
    224 
    225 
    226     PyMemAllocatorEx new_alloc;
    227     switch(domain)
    228     {
    229     case PYMEM_DOMAIN_RAW:
    230         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
    231         break;
    232     case PYMEM_DOMAIN_MEM:
    233         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
    234         break;
    235     case PYMEM_DOMAIN_OBJ:
    236         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
    237         break;
    238     default:
    239         /* unknown domain */
    240         return -1;
    241     }
    242     PyMem_SetAllocator(domain, &new_alloc);
    243     if (debug) {
    244         _PyMem_SetupDebugHooksDomain(domain);
    245     }
    246     return 0;
    247 }
    248 
    249 
    250 int
    251 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
    252                            PyMemAllocatorEx *old_alloc)
    253 {
    254 #ifdef Py_DEBUG
    255     const int debug = 1;
    256 #else
    257     const int debug = 0;
    258 #endif
    259     return pymem_set_default_allocator(domain, debug, old_alloc);
    260 }
    261 
    262 
    263 int
    264 _PyMem_SetupAllocators(const char *opt)
    265 {
    266     if (opt == NULL || *opt == '\0') {
    267         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
    268            options): use default memory allocators */
    269         opt = "default";
    270     }
    271 
    272     if (strcmp(opt, "default") == 0) {
    273         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
    274         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
    275         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
    276     }
    277     else if (strcmp(opt, "debug") == 0) {
    278         (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
    279         (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
    280         (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
    281     }
    282 #ifdef WITH_PYMALLOC
    283     else if (strcmp(opt, "pymalloc") == 0 || strcmp(opt, "pymalloc_debug") == 0) {
    284         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
    285         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
    286 
    287         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
    288         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
    289         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
    290 
    291         if (strcmp(opt, "pymalloc_debug") == 0) {
    292             PyMem_SetupDebugHooks();
    293         }
    294     }
    295 #endif
    296     else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0) {
    297         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
    298         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
    299         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
    300         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
    301 
    302         if (strcmp(opt, "malloc_debug") == 0) {
    303             PyMem_SetupDebugHooks();
    304         }
    305     }
    306     else {
    307         /* unknown allocator */
    308         return -1;
    309     }
    310     return 0;
    311 }
    312 
    313 
    314 static int
    315 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
    316 {
    317     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
    318 }
    319 
    320 
    321 const char*
    322 _PyMem_GetAllocatorsName(void)
    323 {
    324     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
    325 #ifdef WITH_PYMALLOC
    326     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
    327 #endif
    328 
    329     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
    330         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
    331         pymemallocator_eq(&_PyObject, &malloc_alloc))
    332     {
    333         return "malloc";
    334     }
    335 #ifdef WITH_PYMALLOC
    336     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
    337         pymemallocator_eq(&_PyMem, &pymalloc) &&
    338         pymemallocator_eq(&_PyObject, &pymalloc))
    339     {
    340         return "pymalloc";
    341     }
    342 #endif
    343 
    344     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
    345     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
    346     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
    347 
    348     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
    349         pymemallocator_eq(&_PyMem, &dbg_mem) &&
    350         pymemallocator_eq(&_PyObject, &dbg_obj))
    351     {
    352         /* Debug hooks installed */
    353         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
    354             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
    355             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
    356         {
    357             return "malloc_debug";
    358         }
    359 #ifdef WITH_PYMALLOC
    360         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
    361             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
    362             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
    363         {
    364             return "pymalloc_debug";
    365         }
    366 #endif
    367     }
    368     return NULL;
    369 }
    370 
    371 
    372 #undef MALLOC_ALLOC
    373 #undef PYMALLOC_ALLOC
    374 #undef PYRAW_ALLOC
    375 #undef PYMEM_ALLOC
    376 #undef PYOBJ_ALLOC
    377 #undef PYDBGRAW_ALLOC
    378 #undef PYDBGMEM_ALLOC
    379 #undef PYDBGOBJ_ALLOC
    380 
    381 
    382 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
    383 #ifdef MS_WINDOWS
    384     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
    385 #elif defined(ARENAS_USE_MMAP)
    386     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
    387 #else
    388     _PyObject_ArenaMalloc, _PyObject_ArenaFree
    389 #endif
    390     };
    391 
    392 #ifdef WITH_PYMALLOC
    393 static int
    394 _PyMem_DebugEnabled(void)
    395 {
    396     return (_PyObject.malloc == _PyMem_DebugMalloc);
    397 }
    398 
    399 static int
    400 _PyMem_PymallocEnabled(void)
    401 {
    402     if (_PyMem_DebugEnabled()) {
    403         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
    404     }
    405     else {
    406         return (_PyObject.malloc == _PyObject_Malloc);
    407     }
    408 }
    409 #endif
    410 
    411 
    412 static void
    413 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
    414 {
    415     PyMemAllocatorEx alloc;
    416 
    417     if (domain == PYMEM_DOMAIN_RAW) {
    418         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
    419             return;
    420         }
    421 
    422         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
    423         alloc.ctx = &_PyMem_Debug.raw;
    424         alloc.malloc = _PyMem_DebugRawMalloc;
    425         alloc.calloc = _PyMem_DebugRawCalloc;
    426         alloc.realloc = _PyMem_DebugRawRealloc;
    427         alloc.free = _PyMem_DebugRawFree;
    428         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
    429     }
    430     else if (domain == PYMEM_DOMAIN_MEM) {
    431         if (_PyMem.malloc == _PyMem_DebugMalloc) {
    432             return;
    433         }
    434 
    435         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
    436         alloc.ctx = &_PyMem_Debug.mem;
    437         alloc.malloc = _PyMem_DebugMalloc;
    438         alloc.calloc = _PyMem_DebugCalloc;
    439         alloc.realloc = _PyMem_DebugRealloc;
    440         alloc.free = _PyMem_DebugFree;
    441         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
    442     }
    443     else if (domain == PYMEM_DOMAIN_OBJ)  {
    444         if (_PyObject.malloc == _PyMem_DebugMalloc) {
    445             return;
    446         }
    447 
    448         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
    449         alloc.ctx = &_PyMem_Debug.obj;
    450         alloc.malloc = _PyMem_DebugMalloc;
    451         alloc.calloc = _PyMem_DebugCalloc;
    452         alloc.realloc = _PyMem_DebugRealloc;
    453         alloc.free = _PyMem_DebugFree;
    454         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
    455     }
    456 }
    457 
    458 
    459 void
    460 PyMem_SetupDebugHooks(void)
    461 {
    462     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
    463     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
    464     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
    465 }
    466 
    467 void
    468 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
    469 {
    470     switch(domain)
    471     {
    472     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
    473     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
    474     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
    475     default:
    476         /* unknown domain: set all attributes to NULL */
    477         allocator->ctx = NULL;
    478         allocator->malloc = NULL;
    479         allocator->calloc = NULL;
    480         allocator->realloc = NULL;
    481         allocator->free = NULL;
    482     }
    483 }
    484 
    485 void
    486 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
    487 {
    488     switch(domain)
    489     {
    490     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
    491     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
    492     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
    493     /* ignore unknown domain */
    494     }
    495 }
    496 
    497 void
    498 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
    499 {
    500     *allocator = _PyObject_Arena;
    501 }
    502 
    503 void
    504 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
    505 {
    506     _PyObject_Arena = *allocator;
    507 }
    508 
    509 void *
    510 PyMem_RawMalloc(size_t size)
    511 {
    512     /*
    513      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
    514      * Most python internals blindly use a signed Py_ssize_t to track
    515      * things without checking for overflows or negatives.
    516      * As size_t is unsigned, checking for size < 0 is not required.
    517      */
    518     if (size > (size_t)PY_SSIZE_T_MAX)
    519         return NULL;
    520     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
    521 }
    522 
    523 void *
    524 PyMem_RawCalloc(size_t nelem, size_t elsize)
    525 {
    526     /* see PyMem_RawMalloc() */
    527     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
    528         return NULL;
    529     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
    530 }
    531 
    532 void*
    533 PyMem_RawRealloc(void *ptr, size_t new_size)
    534 {
    535     /* see PyMem_RawMalloc() */
    536     if (new_size > (size_t)PY_SSIZE_T_MAX)
    537         return NULL;
    538     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
    539 }
    540 
    541 void PyMem_RawFree(void *ptr)
    542 {
    543     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
    544 }
    545 
    546 
    547 void *
    548 PyMem_Malloc(size_t size)
    549 {
    550     /* see PyMem_RawMalloc() */
    551     if (size > (size_t)PY_SSIZE_T_MAX)
    552         return NULL;
    553     return _PyMem.malloc(_PyMem.ctx, size);
    554 }
    555 
    556 void *
    557 PyMem_Calloc(size_t nelem, size_t elsize)
    558 {
    559     /* see PyMem_RawMalloc() */
    560     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
    561         return NULL;
    562     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
    563 }
    564 
    565 void *
    566 PyMem_Realloc(void *ptr, size_t new_size)
    567 {
    568     /* see PyMem_RawMalloc() */
    569     if (new_size > (size_t)PY_SSIZE_T_MAX)
    570         return NULL;
    571     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
    572 }
    573 
    574 void
    575 PyMem_Free(void *ptr)
    576 {
    577     _PyMem.free(_PyMem.ctx, ptr);
    578 }
    579 
    580 
    581 wchar_t*
    582 _PyMem_RawWcsdup(const wchar_t *str)
    583 {
    584     assert(str != NULL);
    585 
    586     size_t len = wcslen(str);
    587     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
    588         return NULL;
    589     }
    590 
    591     size_t size = (len + 1) * sizeof(wchar_t);
    592     wchar_t *str2 = PyMem_RawMalloc(size);
    593     if (str2 == NULL) {
    594         return NULL;
    595     }
    596 
    597     memcpy(str2, str, size);
    598     return str2;
    599 }
    600 
    601 char *
    602 _PyMem_RawStrdup(const char *str)
    603 {
    604     assert(str != NULL);
    605     size_t size = strlen(str) + 1;
    606     char *copy = PyMem_RawMalloc(size);
    607     if (copy == NULL) {
    608         return NULL;
    609     }
    610     memcpy(copy, str, size);
    611     return copy;
    612 }
    613 
    614 char *
    615 _PyMem_Strdup(const char *str)
    616 {
    617     assert(str != NULL);
    618     size_t size = strlen(str) + 1;
    619     char *copy = PyMem_Malloc(size);
    620     if (copy == NULL) {
    621         return NULL;
    622     }
    623     memcpy(copy, str, size);
    624     return copy;
    625 }
    626 
    627 void *
    628 PyObject_Malloc(size_t size)
    629 {
    630     /* see PyMem_RawMalloc() */
    631     if (size > (size_t)PY_SSIZE_T_MAX)
    632         return NULL;
    633     return _PyObject.malloc(_PyObject.ctx, size);
    634 }
    635 
    636 void *
    637 PyObject_Calloc(size_t nelem, size_t elsize)
    638 {
    639     /* see PyMem_RawMalloc() */
    640     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
    641         return NULL;
    642     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
    643 }
    644 
    645 void *
    646 PyObject_Realloc(void *ptr, size_t new_size)
    647 {
    648     /* see PyMem_RawMalloc() */
    649     if (new_size > (size_t)PY_SSIZE_T_MAX)
    650         return NULL;
    651     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
    652 }
    653 
    654 void
    655 PyObject_Free(void *ptr)
    656 {
    657     _PyObject.free(_PyObject.ctx, ptr);
    658 }
    659 
    660 
    661 #ifdef WITH_PYMALLOC
    662 
    663 #ifdef WITH_VALGRIND
    664 #include <valgrind/valgrind.h>
    665 
    666 /* If we're using GCC, use __builtin_expect() to reduce overhead of
    667    the valgrind checks */
    668 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
    669 #  define UNLIKELY(value) __builtin_expect((value), 0)
    670 #else
    671 #  define UNLIKELY(value) (value)
    672 #endif
    673 
    674 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
    675 static int running_on_valgrind = -1;
    676 #endif
    677 
    678 
    679 /* An object allocator for Python.
    680 
    681    Here is an introduction to the layers of the Python memory architecture,
    682    showing where the object allocator is actually used (layer +2), It is
    683    called for every object allocation and deallocation (PyObject_New/Del),
    684    unless the object-specific allocators implement a proprietary allocation
    685    scheme (ex.: ints use a simple free list). This is also the place where
    686    the cyclic garbage collector operates selectively on container objects.
    687 
    688 
    689     Object-specific allocators
    690     _____   ______   ______       ________
    691    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
    692 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
    693     _______________________________       |                           |
    694    [   Python's object allocator   ]      |                           |
    695 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
    696     ______________________________________________________________    |
    697    [          Python's raw memory allocator (PyMem_ API)          ]   |
    698 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
    699     __________________________________________________________________
    700    [    Underlying general-purpose allocator (ex: C library malloc)   ]
    701  0 | <------ Virtual memory allocated for the python process -------> |
    702 
    703    =========================================================================
    704     _______________________________________________________________________
    705    [                OS-specific Virtual Memory Manager (VMM)               ]
    706 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
    707     __________________________________   __________________________________
    708    [                                  ] [                                  ]
    709 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
    710 
    711 */
    712 /*==========================================================================*/
    713 
    714 /* A fast, special-purpose memory allocator for small blocks, to be used
    715    on top of a general-purpose malloc -- heavily based on previous art. */
    716 
    717 /* Vladimir Marangozov -- August 2000 */
    718 
    719 /*
    720  * "Memory management is where the rubber meets the road -- if we do the wrong
    721  * thing at any level, the results will not be good. And if we don't make the
    722  * levels work well together, we are in serious trouble." (1)
    723  *
    724  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
    725  *    "Dynamic Storage Allocation: A Survey and Critical Review",
    726  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
    727  */
    728 
    729 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
    730 
    731 /*==========================================================================*/
    732 
    733 /*
    734  * Allocation strategy abstract:
    735  *
    736  * For small requests, the allocator sub-allocates <Big> blocks of memory.
    737  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
    738  * system's allocator.
    739  *
    740  * Small requests are grouped in size classes spaced 8 bytes apart, due
    741  * to the required valid alignment of the returned address. Requests of
    742  * a particular size are serviced from memory pools of 4K (one VMM page).
    743  * Pools are fragmented on demand and contain free lists of blocks of one
    744  * particular size class. In other words, there is a fixed-size allocator
    745  * for each size class. Free pools are shared by the different allocators
    746  * thus minimizing the space reserved for a particular size class.
    747  *
    748  * This allocation strategy is a variant of what is known as "simple
    749  * segregated storage based on array of free lists". The main drawback of
    750  * simple segregated storage is that we might end up with lot of reserved
    751  * memory for the different free lists, which degenerate in time. To avoid
    752  * this, we partition each free list in pools and we share dynamically the
    753  * reserved space between all free lists. This technique is quite efficient
    754  * for memory intensive programs which allocate mainly small-sized blocks.
    755  *
    756  * For small requests we have the following table:
    757  *
    758  * Request in bytes     Size of allocated block      Size class idx
    759  * ----------------------------------------------------------------
    760  *        1-8                     8                       0
    761  *        9-16                   16                       1
    762  *       17-24                   24                       2
    763  *       25-32                   32                       3
    764  *       33-40                   40                       4
    765  *       41-48                   48                       5
    766  *       49-56                   56                       6
    767  *       57-64                   64                       7
    768  *       65-72                   72                       8
    769  *        ...                   ...                     ...
    770  *      497-504                 504                      62
    771  *      505-512                 512                      63
    772  *
    773  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
    774  *      allocator.
    775  */
    776 
    777 /*==========================================================================*/
    778 
    779 /*
    780  * -- Main tunable settings section --
    781  */
    782 
    783 /*
    784  * Alignment of addresses returned to the user. 8-bytes alignment works
    785  * on most current architectures (with 32-bit or 64-bit address busses).
    786  * The alignment value is also used for grouping small requests in size
    787  * classes spaced ALIGNMENT bytes apart.
    788  *
    789  * You shouldn't change this unless you know what you are doing.
    790  */
    791 #define ALIGNMENT               8               /* must be 2^N */
    792 #define ALIGNMENT_SHIFT         3
    793 
    794 /* Return the number of bytes in size class I, as a uint. */
    795 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
    796 
    797 /*
    798  * Max size threshold below which malloc requests are considered to be
    799  * small enough in order to use preallocated memory pools. You can tune
    800  * this value according to your application behaviour and memory needs.
    801  *
    802  * Note: a size threshold of 512 guarantees that newly created dictionaries
    803  * will be allocated from preallocated memory pools on 64-bit.
    804  *
    805  * The following invariants must hold:
    806  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
    807  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
    808  *
    809  * Although not required, for better performance and space efficiency,
    810  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
    811  */
    812 #define SMALL_REQUEST_THRESHOLD 512
    813 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
    814 
    815 /*
    816  * The system's VMM page size can be obtained on most unices with a
    817  * getpagesize() call or deduced from various header files. To make
    818  * things simpler, we assume that it is 4K, which is OK for most systems.
    819  * It is probably better if this is the native page size, but it doesn't
    820  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
    821  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
    822  * violation fault.  4K is apparently OK for all the platforms that python
    823  * currently targets.
    824  */
    825 #define SYSTEM_PAGE_SIZE        (4 * 1024)
    826 #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
    827 
    828 /*
    829  * Maximum amount of memory managed by the allocator for small requests.
    830  */
    831 #ifdef WITH_MEMORY_LIMITS
    832 #ifndef SMALL_MEMORY_LIMIT
    833 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
    834 #endif
    835 #endif
    836 
    837 /*
    838  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
    839  * on a page boundary. This is a reserved virtual address space for the
    840  * current process (obtained through a malloc()/mmap() call). In no way this
    841  * means that the memory arenas will be used entirely. A malloc(<Big>) is
    842  * usually an address range reservation for <Big> bytes, unless all pages within
    843  * this space are referenced subsequently. So malloc'ing big blocks and not
    844  * using them does not mean "wasting memory". It's an addressable range
    845  * wastage...
    846  *
    847  * Arenas are allocated with mmap() on systems supporting anonymous memory
    848  * mappings to reduce heap fragmentation.
    849  */
    850 #define ARENA_SIZE              (256 << 10)     /* 256KB */
    851 
    852 #ifdef WITH_MEMORY_LIMITS
    853 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
    854 #endif
    855 
    856 /*
    857  * Size of the pools used for small blocks. Should be a power of 2,
    858  * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
    859  */
    860 #define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
    861 #define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
    862 
    863 /*
    864  * -- End of tunable settings section --
    865  */
    866 
    867 /*==========================================================================*/
    868 
    869 /*
    870  * Locking
    871  *
    872  * To reduce lock contention, it would probably be better to refine the
    873  * crude function locking with per size class locking. I'm not positive
    874  * however, whether it's worth switching to such locking policy because
    875  * of the performance penalty it might introduce.
    876  *
    877  * The following macros describe the simplest (should also be the fastest)
    878  * lock object on a particular platform and the init/fini/lock/unlock
    879  * operations on it. The locks defined here are not expected to be recursive
    880  * because it is assumed that they will always be called in the order:
    881  * INIT, [LOCK, UNLOCK]*, FINI.
    882  */
    883 
    884 /*
    885  * Python's threads are serialized, so object malloc locking is disabled.
    886  */
    887 #define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
    888 #define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
    889 #define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
    890 #define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
    891 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
    892 
    893 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
    894 typedef uint8_t block;
    895 
    896 /* Pool for small blocks. */
    897 struct pool_header {
    898     union { block *_padding;
    899             uint count; } ref;          /* number of allocated blocks    */
    900     block *freeblock;                   /* pool's free list head         */
    901     struct pool_header *nextpool;       /* next pool of this size class  */
    902     struct pool_header *prevpool;       /* previous pool       ""        */
    903     uint arenaindex;                    /* index into arenas of base adr */
    904     uint szidx;                         /* block size class index        */
    905     uint nextoffset;                    /* bytes to virgin block         */
    906     uint maxnextoffset;                 /* largest valid nextoffset      */
    907 };
    908 
    909 typedef struct pool_header *poolp;
    910 
    911 /* Record keeping for arenas. */
    912 struct arena_object {
    913     /* The address of the arena, as returned by malloc.  Note that 0
    914      * will never be returned by a successful malloc, and is used
    915      * here to mark an arena_object that doesn't correspond to an
    916      * allocated arena.
    917      */
    918     uintptr_t address;
    919 
    920     /* Pool-aligned pointer to the next pool to be carved off. */
    921     block* pool_address;
    922 
    923     /* The number of available pools in the arena:  free pools + never-
    924      * allocated pools.
    925      */
    926     uint nfreepools;
    927 
    928     /* The total number of pools in the arena, whether or not available. */
    929     uint ntotalpools;
    930 
    931     /* Singly-linked list of available pools. */
    932     struct pool_header* freepools;
    933 
    934     /* Whenever this arena_object is not associated with an allocated
    935      * arena, the nextarena member is used to link all unassociated
    936      * arena_objects in the singly-linked `unused_arena_objects` list.
    937      * The prevarena member is unused in this case.
    938      *
    939      * When this arena_object is associated with an allocated arena
    940      * with at least one available pool, both members are used in the
    941      * doubly-linked `usable_arenas` list, which is maintained in
    942      * increasing order of `nfreepools` values.
    943      *
    944      * Else this arena_object is associated with an allocated arena
    945      * all of whose pools are in use.  `nextarena` and `prevarena`
    946      * are both meaningless in this case.
    947      */
    948     struct arena_object* nextarena;
    949     struct arena_object* prevarena;
    950 };
    951 
    952 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
    953 
    954 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
    955 
    956 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
    957 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
    958 
    959 /* Return total number of blocks in pool of size index I, as a uint. */
    960 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
    961 
    962 /*==========================================================================*/
    963 
    964 /*
    965  * This malloc lock
    966  */
    967 SIMPLELOCK_DECL(_malloc_lock)
    968 #define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
    969 #define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
    970 #define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
    971 #define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
    972 
    973 /*
    974  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
    975 
    976 This is involved.  For an index i, usedpools[i+i] is the header for a list of
    977 all partially used pools holding small blocks with "size class idx" i. So
    978 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
    979 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
    980 
    981 Pools are carved off an arena's highwater mark (an arena_object's pool_address
    982 member) as needed.  Once carved off, a pool is in one of three states forever
    983 after:
    984 
    985 used == partially used, neither empty nor full
    986     At least one block in the pool is currently allocated, and at least one
    987     block in the pool is not currently allocated (note this implies a pool
    988     has room for at least two blocks).
    989     This is a pool's initial state, as a pool is created only when malloc
    990     needs space.
    991     The pool holds blocks of a fixed size, and is in the circular list headed
    992     at usedpools[i] (see above).  It's linked to the other used pools of the
    993     same size class via the pool_header's nextpool and prevpool members.
    994     If all but one block is currently allocated, a malloc can cause a
    995     transition to the full state.  If all but one block is not currently
    996     allocated, a free can cause a transition to the empty state.
    997 
    998 full == all the pool's blocks are currently allocated
    999     On transition to full, a pool is unlinked from its usedpools[] list.
   1000     It's not linked to from anything then anymore, and its nextpool and
   1001     prevpool members are meaningless until it transitions back to used.
   1002     A free of a block in a full pool puts the pool back in the used state.
   1003     Then it's linked in at the front of the appropriate usedpools[] list, so
   1004     that the next allocation for its size class will reuse the freed block.
   1005 
   1006 empty == all the pool's blocks are currently available for allocation
   1007     On transition to empty, a pool is unlinked from its usedpools[] list,
   1008     and linked to the front of its arena_object's singly-linked freepools list,
   1009     via its nextpool member.  The prevpool member has no meaning in this case.
   1010     Empty pools have no inherent size class:  the next time a malloc finds
   1011     an empty list in usedpools[], it takes the first pool off of freepools.
   1012     If the size class needed happens to be the same as the size class the pool
   1013     last had, some pool initialization can be skipped.
   1014 
   1015 
   1016 Block Management
   1017 
   1018 Blocks within pools are again carved out as needed.  pool->freeblock points to
   1019 the start of a singly-linked list of free blocks within the pool.  When a
   1020 block is freed, it's inserted at the front of its pool's freeblock list.  Note
   1021 that the available blocks in a pool are *not* linked all together when a pool
   1022 is initialized.  Instead only "the first two" (lowest addresses) blocks are
   1023 set up, returning the first such block, and setting pool->freeblock to a
   1024 one-block list holding the second such block.  This is consistent with that
   1025 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
   1026 of memory until it's actually needed.
   1027 
   1028 So long as a pool is in the used state, we're certain there *is* a block
   1029 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
   1030 points to the end of the free list before we've carved the entire pool into
   1031 blocks, that means we simply haven't yet gotten to one of the higher-address
   1032 blocks.  The offset from the pool_header to the start of "the next" virgin
   1033 block is stored in the pool_header nextoffset member, and the largest value
   1034 of nextoffset that makes sense is stored in the maxnextoffset member when a
   1035 pool is initialized.  All the blocks in a pool have been passed out at least
   1036 once when and only when nextoffset > maxnextoffset.
   1037 
   1038 
   1039 Major obscurity:  While the usedpools vector is declared to have poolp
   1040 entries, it doesn't really.  It really contains two pointers per (conceptual)
   1041 poolp entry, the nextpool and prevpool members of a pool_header.  The
   1042 excruciating initialization code below fools C so that
   1043 
   1044     usedpool[i+i]
   1045 
   1046 "acts like" a genuine poolp, but only so long as you only reference its
   1047 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
   1048 compensating for that a pool_header's nextpool and prevpool members
   1049 immediately follow a pool_header's first two members:
   1050 
   1051     union { block *_padding;
   1052             uint count; } ref;
   1053     block *freeblock;
   1054 
   1055 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
   1056 contains is a fudged-up pointer p such that *if* C believes it's a poolp
   1057 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
   1058 circular list is empty).
   1059 
   1060 It's unclear why the usedpools setup is so convoluted.  It could be to
   1061 minimize the amount of cache required to hold this heavily-referenced table
   1062 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
   1063 referencing code has to remember to "double the index" and doing so isn't
   1064 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
   1065 on that C doesn't insert any padding anywhere in a pool_header at or before
   1066 the prevpool member.
   1067 **************************************************************************** */
   1068 
   1069 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
   1070 #define PT(x)   PTA(x), PTA(x)
   1071 
   1072 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
   1073     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
   1074 #if NB_SMALL_SIZE_CLASSES > 8
   1075     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
   1076 #if NB_SMALL_SIZE_CLASSES > 16
   1077     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
   1078 #if NB_SMALL_SIZE_CLASSES > 24
   1079     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
   1080 #if NB_SMALL_SIZE_CLASSES > 32
   1081     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
   1082 #if NB_SMALL_SIZE_CLASSES > 40
   1083     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
   1084 #if NB_SMALL_SIZE_CLASSES > 48
   1085     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
   1086 #if NB_SMALL_SIZE_CLASSES > 56
   1087     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
   1088 #if NB_SMALL_SIZE_CLASSES > 64
   1089 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
   1090 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
   1091 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
   1092 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
   1093 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
   1094 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
   1095 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
   1096 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
   1097 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
   1098 };
   1099 
   1100 /*==========================================================================
   1101 Arena management.
   1102 
   1103 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
   1104 which may not be currently used (== they're arena_objects that aren't
   1105 currently associated with an allocated arena).  Note that arenas proper are
   1106 separately malloc'ed.
   1107 
   1108 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
   1109 we do try to free() arenas, and use some mild heuristic strategies to increase
   1110 the likelihood that arenas eventually can be freed.
   1111 
   1112 unused_arena_objects
   1113 
   1114     This is a singly-linked list of the arena_objects that are currently not
   1115     being used (no arena is associated with them).  Objects are taken off the
   1116     head of the list in new_arena(), and are pushed on the head of the list in
   1117     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
   1118     is on this list if and only if its .address member is 0.
   1119 
   1120 usable_arenas
   1121 
   1122     This is a doubly-linked list of the arena_objects associated with arenas
   1123     that have pools available.  These pools are either waiting to be reused,
   1124     or have not been used before.  The list is sorted to have the most-
   1125     allocated arenas first (ascending order based on the nfreepools member).
   1126     This means that the next allocation will come from a heavily used arena,
   1127     which gives the nearly empty arenas a chance to be returned to the system.
   1128     In my unscientific tests this dramatically improved the number of arenas
   1129     that could be freed.
   1130 
   1131 Note that an arena_object associated with an arena all of whose pools are
   1132 currently in use isn't on either list.
   1133 */
   1134 
   1135 /* Array of objects used to track chunks of memory (arenas). */
   1136 static struct arena_object* arenas = NULL;
   1137 /* Number of slots currently allocated in the `arenas` vector. */
   1138 static uint maxarenas = 0;
   1139 
   1140 /* The head of the singly-linked, NULL-terminated list of available
   1141  * arena_objects.
   1142  */
   1143 static struct arena_object* unused_arena_objects = NULL;
   1144 
   1145 /* The head of the doubly-linked, NULL-terminated at each end, list of
   1146  * arena_objects associated with arenas that have pools available.
   1147  */
   1148 static struct arena_object* usable_arenas = NULL;
   1149 
   1150 /* How many arena_objects do we initially allocate?
   1151  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
   1152  * `arenas` vector.
   1153  */
   1154 #define INITIAL_ARENA_OBJECTS 16
   1155 
   1156 /* Number of arenas allocated that haven't been free()'d. */
   1157 static size_t narenas_currently_allocated = 0;
   1158 
   1159 /* Total number of times malloc() called to allocate an arena. */
   1160 static size_t ntimes_arena_allocated = 0;
   1161 /* High water mark (max value ever seen) for narenas_currently_allocated. */
   1162 static size_t narenas_highwater = 0;
   1163 
   1164 static Py_ssize_t _Py_AllocatedBlocks = 0;
   1165 
   1166 Py_ssize_t
   1167 _Py_GetAllocatedBlocks(void)
   1168 {
   1169     return _Py_AllocatedBlocks;
   1170 }
   1171 
   1172 
   1173 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
   1174  * allocate a new arena, and return the address of an arena_object
   1175  * describing the new arena.  It's expected that the caller will set
   1176  * `usable_arenas` to the return value.
   1177  */
   1178 static struct arena_object*
   1179 new_arena(void)
   1180 {
   1181     struct arena_object* arenaobj;
   1182     uint excess;        /* number of bytes above pool alignment */
   1183     void *address;
   1184     static int debug_stats = -1;
   1185 
   1186     if (debug_stats == -1) {
   1187         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
   1188         debug_stats = (opt != NULL && *opt != '\0');
   1189     }
   1190     if (debug_stats)
   1191         _PyObject_DebugMallocStats(stderr);
   1192 
   1193     if (unused_arena_objects == NULL) {
   1194         uint i;
   1195         uint numarenas;
   1196         size_t nbytes;
   1197 
   1198         /* Double the number of arena objects on each allocation.
   1199          * Note that it's possible for `numarenas` to overflow.
   1200          */
   1201         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
   1202         if (numarenas <= maxarenas)
   1203             return NULL;                /* overflow */
   1204 #if SIZEOF_SIZE_T <= SIZEOF_INT
   1205         if (numarenas > SIZE_MAX / sizeof(*arenas))
   1206             return NULL;                /* overflow */
   1207 #endif
   1208         nbytes = numarenas * sizeof(*arenas);
   1209         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
   1210         if (arenaobj == NULL)
   1211             return NULL;
   1212         arenas = arenaobj;
   1213 
   1214         /* We might need to fix pointers that were copied.  However,
   1215          * new_arena only gets called when all the pages in the
   1216          * previous arenas are full.  Thus, there are *no* pointers
   1217          * into the old array. Thus, we don't have to worry about
   1218          * invalid pointers.  Just to be sure, some asserts:
   1219          */
   1220         assert(usable_arenas == NULL);
   1221         assert(unused_arena_objects == NULL);
   1222 
   1223         /* Put the new arenas on the unused_arena_objects list. */
   1224         for (i = maxarenas; i < numarenas; ++i) {
   1225             arenas[i].address = 0;              /* mark as unassociated */
   1226             arenas[i].nextarena = i < numarenas - 1 ?
   1227                                    &arenas[i+1] : NULL;
   1228         }
   1229 
   1230         /* Update globals. */
   1231         unused_arena_objects = &arenas[maxarenas];
   1232         maxarenas = numarenas;
   1233     }
   1234 
   1235     /* Take the next available arena object off the head of the list. */
   1236     assert(unused_arena_objects != NULL);
   1237     arenaobj = unused_arena_objects;
   1238     unused_arena_objects = arenaobj->nextarena;
   1239     assert(arenaobj->address == 0);
   1240     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
   1241     if (address == NULL) {
   1242         /* The allocation failed: return NULL after putting the
   1243          * arenaobj back.
   1244          */
   1245         arenaobj->nextarena = unused_arena_objects;
   1246         unused_arena_objects = arenaobj;
   1247         return NULL;
   1248     }
   1249     arenaobj->address = (uintptr_t)address;
   1250 
   1251     ++narenas_currently_allocated;
   1252     ++ntimes_arena_allocated;
   1253     if (narenas_currently_allocated > narenas_highwater)
   1254         narenas_highwater = narenas_currently_allocated;
   1255     arenaobj->freepools = NULL;
   1256     /* pool_address <- first pool-aligned address in the arena
   1257        nfreepools <- number of whole pools that fit after alignment */
   1258     arenaobj->pool_address = (block*)arenaobj->address;
   1259     arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
   1260     assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
   1261     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
   1262     if (excess != 0) {
   1263         --arenaobj->nfreepools;
   1264         arenaobj->pool_address += POOL_SIZE - excess;
   1265     }
   1266     arenaobj->ntotalpools = arenaobj->nfreepools;
   1267 
   1268     return arenaobj;
   1269 }
   1270 
   1271 
   1272 /*
   1273 address_in_range(P, POOL)
   1274 
   1275 Return true if and only if P is an address that was allocated by pymalloc.
   1276 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
   1277 (the caller is asked to compute this because the macro expands POOL more than
   1278 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
   1279 variable and pass the latter to the macro; because address_in_range is
   1280 called on every alloc/realloc/free, micro-efficiency is important here).
   1281 
   1282 Tricky:  Let B be the arena base address associated with the pool, B =
   1283 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
   1284 
   1285     B <= P < B + ARENA_SIZE
   1286 
   1287 Subtracting B throughout, this is true iff
   1288 
   1289     0 <= P-B < ARENA_SIZE
   1290 
   1291 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
   1292 
   1293 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
   1294 before the first arena has been allocated.  `arenas` is still NULL in that
   1295 case.  We're relying on that maxarenas is also 0 in that case, so that
   1296 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
   1297 into a NULL arenas.
   1298 
   1299 Details:  given P and POOL, the arena_object corresponding to P is AO =
   1300 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
   1301 stores, etc), POOL is the correct address of P's pool, AO.address is the
   1302 correct base address of the pool's arena, and P must be within ARENA_SIZE of
   1303 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
   1304 (NULL)).  Therefore address_in_range correctly reports that obmalloc
   1305 controls P.
   1306 
   1307 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
   1308 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
   1309 in this case -- it may even be uninitialized trash.  If the trash arenaindex
   1310 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
   1311 control P.
   1312 
   1313 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
   1314 allocated arena, obmalloc controls all the memory in slice AO.address :
   1315 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
   1316 so P doesn't lie in that slice, so the macro correctly reports that P is not
   1317 controlled by obmalloc.
   1318 
   1319 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
   1320 arena_object (one not currently associated with an allocated arena),
   1321 AO.address is 0, and the second test in the macro reduces to:
   1322 
   1323     P < ARENA_SIZE
   1324 
   1325 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
   1326 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
   1327 of the test still passes, and the third clause (AO.address != 0) is necessary
   1328 to get the correct result:  AO.address is 0 in this case, so the macro
   1329 correctly reports that P is not controlled by obmalloc (despite that P lies in
   1330 slice AO.address : AO.address + ARENA_SIZE).
   1331 
   1332 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
   1333 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
   1334 corresponded to a currently-allocated arena, so the "P is not controlled by
   1335 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
   1336 was impossible.
   1337 
   1338 Note that the logic is excruciating, and reading up possibly uninitialized
   1339 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
   1340 creates problems for some memory debuggers.  The overwhelming advantage is
   1341 that this test determines whether an arbitrary address is controlled by
   1342 obmalloc in a small constant time, independent of the number of arenas
   1343 obmalloc controls.  Since this test is needed at every entry point, it's
   1344 extremely desirable that it be this fast.
   1345 */
   1346 
   1347 static bool _Py_NO_ADDRESS_SAFETY_ANALYSIS
   1348             _Py_NO_SANITIZE_THREAD
   1349             _Py_NO_SANITIZE_MEMORY
   1350 address_in_range(void *p, poolp pool)
   1351 {
   1352     // Since address_in_range may be reading from memory which was not allocated
   1353     // by Python, it is important that pool->arenaindex is read only once, as
   1354     // another thread may be concurrently modifying the value without holding
   1355     // the GIL. The following dance forces the compiler to read pool->arenaindex
   1356     // only once.
   1357     uint arenaindex = *((volatile uint *)&pool->arenaindex);
   1358     return arenaindex < maxarenas &&
   1359         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
   1360         arenas[arenaindex].address != 0;
   1361 }
   1362 
   1363 
   1364 /*==========================================================================*/
   1365 
   1366 /* pymalloc allocator
   1367 
   1368    The basic blocks are ordered by decreasing execution frequency,
   1369    which minimizes the number of jumps in the most common cases,
   1370    improves branching prediction and instruction scheduling (small
   1371    block allocations typically result in a couple of instructions).
   1372    Unless the optimizer reorders everything, being too smart...
   1373 
   1374    Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
   1375 
   1376    Return 0 if pymalloc failed to allocate the memory block: on bigger
   1377    requests, on error in the code below (as a last chance to serve the request)
   1378    or when the max memory limit has been reached. */
   1379 static int
   1380 pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
   1381 {
   1382     block *bp;
   1383     poolp pool;
   1384     poolp next;
   1385     uint size;
   1386 
   1387 #ifdef WITH_VALGRIND
   1388     if (UNLIKELY(running_on_valgrind == -1)) {
   1389         running_on_valgrind = RUNNING_ON_VALGRIND;
   1390     }
   1391     if (UNLIKELY(running_on_valgrind)) {
   1392         return 0;
   1393     }
   1394 #endif
   1395 
   1396     if (nbytes == 0) {
   1397         return 0;
   1398     }
   1399     if (nbytes > SMALL_REQUEST_THRESHOLD) {
   1400         return 0;
   1401     }
   1402 
   1403     LOCK();
   1404     /*
   1405      * Most frequent paths first
   1406      */
   1407     size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
   1408     pool = usedpools[size + size];
   1409     if (pool != pool->nextpool) {
   1410         /*
   1411          * There is a used pool for this size class.
   1412          * Pick up the head block of its free list.
   1413          */
   1414         ++pool->ref.count;
   1415         bp = pool->freeblock;
   1416         assert(bp != NULL);
   1417         if ((pool->freeblock = *(block **)bp) != NULL) {
   1418             goto success;
   1419         }
   1420 
   1421         /*
   1422          * Reached the end of the free list, try to extend it.
   1423          */
   1424         if (pool->nextoffset <= pool->maxnextoffset) {
   1425             /* There is room for another block. */
   1426             pool->freeblock = (block*)pool +
   1427                               pool->nextoffset;
   1428             pool->nextoffset += INDEX2SIZE(size);
   1429             *(block **)(pool->freeblock) = NULL;
   1430             goto success;
   1431         }
   1432 
   1433         /* Pool is full, unlink from used pools. */
   1434         next = pool->nextpool;
   1435         pool = pool->prevpool;
   1436         next->prevpool = pool;
   1437         pool->nextpool = next;
   1438         goto success;
   1439     }
   1440 
   1441     /* There isn't a pool of the right size class immediately
   1442      * available:  use a free pool.
   1443      */
   1444     if (usable_arenas == NULL) {
   1445         /* No arena has a free pool:  allocate a new arena. */
   1446 #ifdef WITH_MEMORY_LIMITS
   1447         if (narenas_currently_allocated >= MAX_ARENAS) {
   1448             goto failed;
   1449         }
   1450 #endif
   1451         usable_arenas = new_arena();
   1452         if (usable_arenas == NULL) {
   1453             goto failed;
   1454         }
   1455         usable_arenas->nextarena =
   1456             usable_arenas->prevarena = NULL;
   1457     }
   1458     assert(usable_arenas->address != 0);
   1459 
   1460     /* Try to get a cached free pool. */
   1461     pool = usable_arenas->freepools;
   1462     if (pool != NULL) {
   1463         /* Unlink from cached pools. */
   1464         usable_arenas->freepools = pool->nextpool;
   1465 
   1466         /* This arena already had the smallest nfreepools
   1467          * value, so decreasing nfreepools doesn't change
   1468          * that, and we don't need to rearrange the
   1469          * usable_arenas list.  However, if the arena has
   1470          * become wholly allocated, we need to remove its
   1471          * arena_object from usable_arenas.
   1472          */
   1473         --usable_arenas->nfreepools;
   1474         if (usable_arenas->nfreepools == 0) {
   1475             /* Wholly allocated:  remove. */
   1476             assert(usable_arenas->freepools == NULL);
   1477             assert(usable_arenas->nextarena == NULL ||
   1478                    usable_arenas->nextarena->prevarena ==
   1479                    usable_arenas);
   1480 
   1481             usable_arenas = usable_arenas->nextarena;
   1482             if (usable_arenas != NULL) {
   1483                 usable_arenas->prevarena = NULL;
   1484                 assert(usable_arenas->address != 0);
   1485             }
   1486         }
   1487         else {
   1488             /* nfreepools > 0:  it must be that freepools
   1489              * isn't NULL, or that we haven't yet carved
   1490              * off all the arena's pools for the first
   1491              * time.
   1492              */
   1493             assert(usable_arenas->freepools != NULL ||
   1494                    usable_arenas->pool_address <=
   1495                    (block*)usable_arenas->address +
   1496                        ARENA_SIZE - POOL_SIZE);
   1497         }
   1498 
   1499     init_pool:
   1500         /* Frontlink to used pools. */
   1501         next = usedpools[size + size]; /* == prev */
   1502         pool->nextpool = next;
   1503         pool->prevpool = next;
   1504         next->nextpool = pool;
   1505         next->prevpool = pool;
   1506         pool->ref.count = 1;
   1507         if (pool->szidx == size) {
   1508             /* Luckily, this pool last contained blocks
   1509              * of the same size class, so its header
   1510              * and free list are already initialized.
   1511              */
   1512             bp = pool->freeblock;
   1513             assert(bp != NULL);
   1514             pool->freeblock = *(block **)bp;
   1515             goto success;
   1516         }
   1517         /*
   1518          * Initialize the pool header, set up the free list to
   1519          * contain just the second block, and return the first
   1520          * block.
   1521          */
   1522         pool->szidx = size;
   1523         size = INDEX2SIZE(size);
   1524         bp = (block *)pool + POOL_OVERHEAD;
   1525         pool->nextoffset = POOL_OVERHEAD + (size << 1);
   1526         pool->maxnextoffset = POOL_SIZE - size;
   1527         pool->freeblock = bp + size;
   1528         *(block **)(pool->freeblock) = NULL;
   1529         goto success;
   1530     }
   1531 
   1532     /* Carve off a new pool. */
   1533     assert(usable_arenas->nfreepools > 0);
   1534     assert(usable_arenas->freepools == NULL);
   1535     pool = (poolp)usable_arenas->pool_address;
   1536     assert((block*)pool <= (block*)usable_arenas->address +
   1537                              ARENA_SIZE - POOL_SIZE);
   1538     pool->arenaindex = (uint)(usable_arenas - arenas);
   1539     assert(&arenas[pool->arenaindex] == usable_arenas);
   1540     pool->szidx = DUMMY_SIZE_IDX;
   1541     usable_arenas->pool_address += POOL_SIZE;
   1542     --usable_arenas->nfreepools;
   1543 
   1544     if (usable_arenas->nfreepools == 0) {
   1545         assert(usable_arenas->nextarena == NULL ||
   1546                usable_arenas->nextarena->prevarena ==
   1547                usable_arenas);
   1548         /* Unlink the arena:  it is completely allocated. */
   1549         usable_arenas = usable_arenas->nextarena;
   1550         if (usable_arenas != NULL) {
   1551             usable_arenas->prevarena = NULL;
   1552             assert(usable_arenas->address != 0);
   1553         }
   1554     }
   1555 
   1556     goto init_pool;
   1557 
   1558 success:
   1559     UNLOCK();
   1560     assert(bp != NULL);
   1561     *ptr_p = (void *)bp;
   1562     return 1;
   1563 
   1564 failed:
   1565     UNLOCK();
   1566     return 0;
   1567 }
   1568 
   1569 
   1570 static void *
   1571 _PyObject_Malloc(void *ctx, size_t nbytes)
   1572 {
   1573     void* ptr;
   1574     if (pymalloc_alloc(ctx, &ptr, nbytes)) {
   1575         _Py_AllocatedBlocks++;
   1576         return ptr;
   1577     }
   1578 
   1579     ptr = PyMem_RawMalloc(nbytes);
   1580     if (ptr != NULL) {
   1581         _Py_AllocatedBlocks++;
   1582     }
   1583     return ptr;
   1584 }
   1585 
   1586 
   1587 static void *
   1588 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
   1589 {
   1590     void* ptr;
   1591 
   1592     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
   1593     size_t nbytes = nelem * elsize;
   1594 
   1595     if (pymalloc_alloc(ctx, &ptr, nbytes)) {
   1596         memset(ptr, 0, nbytes);
   1597         _Py_AllocatedBlocks++;
   1598         return ptr;
   1599     }
   1600 
   1601     ptr = PyMem_RawCalloc(nelem, elsize);
   1602     if (ptr != NULL) {
   1603         _Py_AllocatedBlocks++;
   1604     }
   1605     return ptr;
   1606 }
   1607 
   1608 
   1609 /* Free a memory block allocated by pymalloc_alloc().
   1610    Return 1 if it was freed.
   1611    Return 0 if the block was not allocated by pymalloc_alloc(). */
   1612 static int
   1613 pymalloc_free(void *ctx, void *p)
   1614 {
   1615     poolp pool;
   1616     block *lastfree;
   1617     poolp next, prev;
   1618     uint size;
   1619 
   1620     assert(p != NULL);
   1621 
   1622 #ifdef WITH_VALGRIND
   1623     if (UNLIKELY(running_on_valgrind > 0)) {
   1624         return 0;
   1625     }
   1626 #endif
   1627 
   1628     pool = POOL_ADDR(p);
   1629     if (!address_in_range(p, pool)) {
   1630         return 0;
   1631     }
   1632     /* We allocated this address. */
   1633 
   1634     LOCK();
   1635 
   1636     /* Link p to the start of the pool's freeblock list.  Since
   1637      * the pool had at least the p block outstanding, the pool
   1638      * wasn't empty (so it's already in a usedpools[] list, or
   1639      * was full and is in no list -- it's not in the freeblocks
   1640      * list in any case).
   1641      */
   1642     assert(pool->ref.count > 0);            /* else it was empty */
   1643     *(block **)p = lastfree = pool->freeblock;
   1644     pool->freeblock = (block *)p;
   1645     if (!lastfree) {
   1646         /* Pool was full, so doesn't currently live in any list:
   1647          * link it to the front of the appropriate usedpools[] list.
   1648          * This mimics LRU pool usage for new allocations and
   1649          * targets optimal filling when several pools contain
   1650          * blocks of the same size class.
   1651          */
   1652         --pool->ref.count;
   1653         assert(pool->ref.count > 0);            /* else the pool is empty */
   1654         size = pool->szidx;
   1655         next = usedpools[size + size];
   1656         prev = next->prevpool;
   1657 
   1658         /* insert pool before next:   prev <-> pool <-> next */
   1659         pool->nextpool = next;
   1660         pool->prevpool = prev;
   1661         next->prevpool = pool;
   1662         prev->nextpool = pool;
   1663         goto success;
   1664     }
   1665 
   1666     struct arena_object* ao;
   1667     uint nf;  /* ao->nfreepools */
   1668 
   1669     /* freeblock wasn't NULL, so the pool wasn't full,
   1670      * and the pool is in a usedpools[] list.
   1671      */
   1672     if (--pool->ref.count != 0) {
   1673         /* pool isn't empty:  leave it in usedpools */
   1674         goto success;
   1675     }
   1676     /* Pool is now empty:  unlink from usedpools, and
   1677      * link to the front of freepools.  This ensures that
   1678      * previously freed pools will be allocated later
   1679      * (being not referenced, they are perhaps paged out).
   1680      */
   1681     next = pool->nextpool;
   1682     prev = pool->prevpool;
   1683     next->prevpool = prev;
   1684     prev->nextpool = next;
   1685 
   1686     /* Link the pool to freepools.  This is a singly-linked
   1687      * list, and pool->prevpool isn't used there.
   1688      */
   1689     ao = &arenas[pool->arenaindex];
   1690     pool->nextpool = ao->freepools;
   1691     ao->freepools = pool;
   1692     nf = ++ao->nfreepools;
   1693 
   1694     /* All the rest is arena management.  We just freed
   1695      * a pool, and there are 4 cases for arena mgmt:
   1696      * 1. If all the pools are free, return the arena to
   1697      *    the system free().
   1698      * 2. If this is the only free pool in the arena,
   1699      *    add the arena back to the `usable_arenas` list.
   1700      * 3. If the "next" arena has a smaller count of free
   1701      *    pools, we have to "slide this arena right" to
   1702      *    restore that usable_arenas is sorted in order of
   1703      *    nfreepools.
   1704      * 4. Else there's nothing more to do.
   1705      */
   1706     if (nf == ao->ntotalpools) {
   1707         /* Case 1.  First unlink ao from usable_arenas.
   1708          */
   1709         assert(ao->prevarena == NULL ||
   1710                ao->prevarena->address != 0);
   1711         assert(ao ->nextarena == NULL ||
   1712                ao->nextarena->address != 0);
   1713 
   1714         /* Fix the pointer in the prevarena, or the
   1715          * usable_arenas pointer.
   1716          */
   1717         if (ao->prevarena == NULL) {
   1718             usable_arenas = ao->nextarena;
   1719             assert(usable_arenas == NULL ||
   1720                    usable_arenas->address != 0);
   1721         }
   1722         else {
   1723             assert(ao->prevarena->nextarena == ao);
   1724             ao->prevarena->nextarena =
   1725                 ao->nextarena;
   1726         }
   1727         /* Fix the pointer in the nextarena. */
   1728         if (ao->nextarena != NULL) {
   1729             assert(ao->nextarena->prevarena == ao);
   1730             ao->nextarena->prevarena =
   1731                 ao->prevarena;
   1732         }
   1733         /* Record that this arena_object slot is
   1734          * available to be reused.
   1735          */
   1736         ao->nextarena = unused_arena_objects;
   1737         unused_arena_objects = ao;
   1738 
   1739         /* Free the entire arena. */
   1740         _PyObject_Arena.free(_PyObject_Arena.ctx,
   1741                              (void *)ao->address, ARENA_SIZE);
   1742         ao->address = 0;                        /* mark unassociated */
   1743         --narenas_currently_allocated;
   1744 
   1745         goto success;
   1746     }
   1747 
   1748     if (nf == 1) {
   1749         /* Case 2.  Put ao at the head of
   1750          * usable_arenas.  Note that because
   1751          * ao->nfreepools was 0 before, ao isn't
   1752          * currently on the usable_arenas list.
   1753          */
   1754         ao->nextarena = usable_arenas;
   1755         ao->prevarena = NULL;
   1756         if (usable_arenas)
   1757             usable_arenas->prevarena = ao;
   1758         usable_arenas = ao;
   1759         assert(usable_arenas->address != 0);
   1760 
   1761         goto success;
   1762     }
   1763 
   1764     /* If this arena is now out of order, we need to keep
   1765      * the list sorted.  The list is kept sorted so that
   1766      * the "most full" arenas are used first, which allows
   1767      * the nearly empty arenas to be completely freed.  In
   1768      * a few un-scientific tests, it seems like this
   1769      * approach allowed a lot more memory to be freed.
   1770      */
   1771     if (ao->nextarena == NULL ||
   1772                  nf <= ao->nextarena->nfreepools) {
   1773         /* Case 4.  Nothing to do. */
   1774         goto success;
   1775     }
   1776     /* Case 3:  We have to move the arena towards the end
   1777      * of the list, because it has more free pools than
   1778      * the arena to its right.
   1779      * First unlink ao from usable_arenas.
   1780      */
   1781     if (ao->prevarena != NULL) {
   1782         /* ao isn't at the head of the list */
   1783         assert(ao->prevarena->nextarena == ao);
   1784         ao->prevarena->nextarena = ao->nextarena;
   1785     }
   1786     else {
   1787         /* ao is at the head of the list */
   1788         assert(usable_arenas == ao);
   1789         usable_arenas = ao->nextarena;
   1790     }
   1791     ao->nextarena->prevarena = ao->prevarena;
   1792 
   1793     /* Locate the new insertion point by iterating over
   1794      * the list, using our nextarena pointer.
   1795      */
   1796     while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) {
   1797         ao->prevarena = ao->nextarena;
   1798         ao->nextarena = ao->nextarena->nextarena;
   1799     }
   1800 
   1801     /* Insert ao at this point. */
   1802     assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena);
   1803     assert(ao->prevarena->nextarena == ao->nextarena);
   1804 
   1805     ao->prevarena->nextarena = ao;
   1806     if (ao->nextarena != NULL) {
   1807         ao->nextarena->prevarena = ao;
   1808     }
   1809 
   1810     /* Verify that the swaps worked. */
   1811     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
   1812     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
   1813     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
   1814     assert((usable_arenas == ao && ao->prevarena == NULL)
   1815            || ao->prevarena->nextarena == ao);
   1816 
   1817     goto success;
   1818 
   1819 success:
   1820     UNLOCK();
   1821     return 1;
   1822 }
   1823 
   1824 
   1825 static void
   1826 _PyObject_Free(void *ctx, void *p)
   1827 {
   1828     /* PyObject_Free(NULL) has no effect */
   1829     if (p == NULL) {
   1830         return;
   1831     }
   1832 
   1833     _Py_AllocatedBlocks--;
   1834     if (!pymalloc_free(ctx, p)) {
   1835         /* pymalloc didn't allocate this address */
   1836         PyMem_RawFree(p);
   1837     }
   1838 }
   1839 
   1840 
   1841 /* pymalloc realloc.
   1842 
   1843    If nbytes==0, then as the Python docs promise, we do not treat this like
   1844    free(p), and return a non-NULL result.
   1845 
   1846    Return 1 if pymalloc reallocated memory and wrote the new pointer into
   1847    newptr_p.
   1848 
   1849    Return 0 if pymalloc didn't allocated p. */
   1850 static int
   1851 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
   1852 {
   1853     void *bp;
   1854     poolp pool;
   1855     size_t size;
   1856 
   1857     assert(p != NULL);
   1858 
   1859 #ifdef WITH_VALGRIND
   1860     /* Treat running_on_valgrind == -1 the same as 0 */
   1861     if (UNLIKELY(running_on_valgrind > 0)) {
   1862         return 0;
   1863     }
   1864 #endif
   1865 
   1866     pool = POOL_ADDR(p);
   1867     if (!address_in_range(p, pool)) {
   1868         /* pymalloc is not managing this block.
   1869 
   1870            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
   1871            over this block.  However, if we do, we need to copy the valid data
   1872            from the C-managed block to one of our blocks, and there's no
   1873            portable way to know how much of the memory space starting at p is
   1874            valid.
   1875 
   1876            As bug 1185883 pointed out the hard way, it's possible that the
   1877            C-managed block is "at the end" of allocated VM space, so that a
   1878            memory fault can occur if we try to copy nbytes bytes starting at p.
   1879            Instead we punt: let C continue to manage this block. */
   1880         return 0;
   1881     }
   1882 
   1883     /* pymalloc is in charge of this block */
   1884     size = INDEX2SIZE(pool->szidx);
   1885     if (nbytes <= size) {
   1886         /* The block is staying the same or shrinking.
   1887 
   1888            If it's shrinking, there's a tradeoff: it costs cycles to copy the
   1889            block to a smaller size class, but it wastes memory not to copy it.
   1890 
   1891            The compromise here is to copy on shrink only if at least 25% of
   1892            size can be shaved off. */
   1893         if (4 * nbytes > 3 * size) {
   1894             /* It's the same, or shrinking and new/old > 3/4. */
   1895             *newptr_p = p;
   1896             return 1;
   1897         }
   1898         size = nbytes;
   1899     }
   1900 
   1901     bp = _PyObject_Malloc(ctx, nbytes);
   1902     if (bp != NULL) {
   1903         memcpy(bp, p, size);
   1904         _PyObject_Free(ctx, p);
   1905     }
   1906     *newptr_p = bp;
   1907     return 1;
   1908 }
   1909 
   1910 
   1911 static void *
   1912 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
   1913 {
   1914     void *ptr2;
   1915 
   1916     if (ptr == NULL) {
   1917         return _PyObject_Malloc(ctx, nbytes);
   1918     }
   1919 
   1920     if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
   1921         return ptr2;
   1922     }
   1923 
   1924     return PyMem_RawRealloc(ptr, nbytes);
   1925 }
   1926 
   1927 #else   /* ! WITH_PYMALLOC */
   1928 
   1929 /*==========================================================================*/
   1930 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
   1931  * only be used by extensions that are compiled with pymalloc enabled. */
   1932 
   1933 Py_ssize_t
   1934 _Py_GetAllocatedBlocks(void)
   1935 {
   1936     return 0;
   1937 }
   1938 
   1939 #endif /* WITH_PYMALLOC */
   1940 
   1941 
   1942 /*==========================================================================*/
   1943 /* A x-platform debugging allocator.  This doesn't manage memory directly,
   1944  * it wraps a real allocator, adding extra debugging info to the memory blocks.
   1945  */
   1946 
   1947 /* Special bytes broadcast into debug memory blocks at appropriate times.
   1948  * Strings of these are unlikely to be valid addresses, floats, ints or
   1949  * 7-bit ASCII.
   1950  */
   1951 #undef CLEANBYTE
   1952 #undef DEADBYTE
   1953 #undef FORBIDDENBYTE
   1954 #define CLEANBYTE      0xCB    /* clean (newly allocated) memory */
   1955 #define DEADBYTE       0xDB    /* dead (newly freed) memory */
   1956 #define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
   1957 
   1958 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
   1959 
   1960 /* serialno is always incremented via calling this routine.  The point is
   1961  * to supply a single place to set a breakpoint.
   1962  */
   1963 static void
   1964 bumpserialno(void)
   1965 {
   1966     ++serialno;
   1967 }
   1968 
   1969 #define SST SIZEOF_SIZE_T
   1970 
   1971 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
   1972 static size_t
   1973 read_size_t(const void *p)
   1974 {
   1975     const uint8_t *q = (const uint8_t *)p;
   1976     size_t result = *q++;
   1977     int i;
   1978 
   1979     for (i = SST; --i > 0; ++q)
   1980         result = (result << 8) | *q;
   1981     return result;
   1982 }
   1983 
   1984 /* Write n as a big-endian size_t, MSB at address p, LSB at
   1985  * p + sizeof(size_t) - 1.
   1986  */
   1987 static void
   1988 write_size_t(void *p, size_t n)
   1989 {
   1990     uint8_t *q = (uint8_t *)p + SST - 1;
   1991     int i;
   1992 
   1993     for (i = SST; --i >= 0; --q) {
   1994         *q = (uint8_t)(n & 0xff);
   1995         n >>= 8;
   1996     }
   1997 }
   1998 
   1999 /* Let S = sizeof(size_t).  The debug malloc asks for 4*S extra bytes and
   2000    fills them with useful stuff, here calling the underlying malloc's result p:
   2001 
   2002 p[0: S]
   2003     Number of bytes originally asked for.  This is a size_t, big-endian (easier
   2004     to read in a memory dump).
   2005 p[S]
   2006     API ID.  See PEP 445.  This is a character, but seems undocumented.
   2007 p[S+1: 2*S]
   2008     Copies of FORBIDDENBYTE.  Used to catch under- writes and reads.
   2009 p[2*S: 2*S+n]
   2010     The requested memory, filled with copies of CLEANBYTE.
   2011     Used to catch reference to uninitialized memory.
   2012     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
   2013     handled the request itself.
   2014 p[2*S+n: 2*S+n+S]
   2015     Copies of FORBIDDENBYTE.  Used to catch over- writes and reads.
   2016 p[2*S+n+S: 2*S+n+2*S]
   2017     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
   2018     and _PyMem_DebugRealloc.
   2019     This is a big-endian size_t.
   2020     If "bad memory" is detected later, the serial number gives an
   2021     excellent way to set a breakpoint on the next run, to capture the
   2022     instant at which this block was passed out.
   2023 */
   2024 
   2025 static void *
   2026 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
   2027 {
   2028     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
   2029     uint8_t *p;           /* base address of malloc'ed pad block */
   2030     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
   2031     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
   2032     size_t total;         /* 2 * SST + nbytes + 2 * SST */
   2033 
   2034     if (nbytes > (size_t)PY_SSIZE_T_MAX - 4 * SST) {
   2035         /* integer overflow: can't represent total as a Py_ssize_t */
   2036         return NULL;
   2037     }
   2038     total = nbytes + 4 * SST;
   2039 
   2040     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
   2041      *          ^--- p    ^--- data   ^--- tail
   2042        S: nbytes stored as size_t
   2043        I: API identifier (1 byte)
   2044        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
   2045        C: Clean bytes used later to store actual data
   2046        N: Serial number stored as size_t */
   2047 
   2048     if (use_calloc) {
   2049         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
   2050     }
   2051     else {
   2052         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
   2053     }
   2054     if (p == NULL) {
   2055         return NULL;
   2056     }
   2057     data = p + 2*SST;
   2058 
   2059     bumpserialno();
   2060 
   2061     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
   2062     write_size_t(p, nbytes);
   2063     p[SST] = (uint8_t)api->api_id;
   2064     memset(p + SST + 1, FORBIDDENBYTE, SST-1);
   2065 
   2066     if (nbytes > 0 && !use_calloc) {
   2067         memset(data, CLEANBYTE, nbytes);
   2068     }
   2069 
   2070     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
   2071     tail = data + nbytes;
   2072     memset(tail, FORBIDDENBYTE, SST);
   2073     write_size_t(tail + SST, serialno);
   2074 
   2075     return data;
   2076 }
   2077 
   2078 static void *
   2079 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
   2080 {
   2081     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
   2082 }
   2083 
   2084 static void *
   2085 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
   2086 {
   2087     size_t nbytes;
   2088     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
   2089     nbytes = nelem * elsize;
   2090     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
   2091 }
   2092 
   2093 
   2094 /* Heuristic checking if the memory has been freed. Rely on the debug hooks on
   2095    Python memory allocators which fills the memory with DEADBYTE (0xDB) when
   2096    memory is deallocated. */
   2097 int
   2098 _PyMem_IsFreed(void *ptr, size_t size)
   2099 {
   2100     unsigned char *bytes = ptr;
   2101     for (size_t i=0; i < size; i++) {
   2102         if (bytes[i] != DEADBYTE) {
   2103             return 0;
   2104         }
   2105     }
   2106     return 1;
   2107 }
   2108 
   2109 
   2110 /* The debug free first checks the 2*SST bytes on each end for sanity (in
   2111    particular, that the FORBIDDENBYTEs with the api ID are still intact).
   2112    Then fills the original bytes with DEADBYTE.
   2113    Then calls the underlying free.
   2114 */
   2115 static void
   2116 _PyMem_DebugRawFree(void *ctx, void *p)
   2117 {
   2118     /* PyMem_Free(NULL) has no effect */
   2119     if (p == NULL) {
   2120         return;
   2121     }
   2122 
   2123     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
   2124     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
   2125     size_t nbytes;
   2126 
   2127     _PyMem_DebugCheckAddress(api->api_id, p);
   2128     nbytes = read_size_t(q);
   2129     nbytes += 4 * SST;
   2130     memset(q, DEADBYTE, nbytes);
   2131     api->alloc.free(api->alloc.ctx, q);
   2132 }
   2133 
   2134 
   2135 static void *
   2136 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
   2137 {
   2138     if (p == NULL) {
   2139         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
   2140     }
   2141 
   2142     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
   2143     uint8_t *head;        /* base address of malloc'ed pad block */
   2144     uint8_t *data;        /* pointer to data bytes */
   2145     uint8_t *r;
   2146     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
   2147     size_t total;         /* 2 * SST + nbytes + 2 * SST */
   2148     size_t original_nbytes;
   2149     size_t block_serialno;
   2150 #define ERASED_SIZE 64
   2151     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
   2152 
   2153     _PyMem_DebugCheckAddress(api->api_id, p);
   2154 
   2155     data = (uint8_t *)p;
   2156     head = data - 2*SST;
   2157     original_nbytes = read_size_t(head);
   2158     if (nbytes > (size_t)PY_SSIZE_T_MAX - 4*SST) {
   2159         /* integer overflow: can't represent total as a Py_ssize_t */
   2160         return NULL;
   2161     }
   2162     total = nbytes + 4*SST;
   2163 
   2164     tail = data + original_nbytes;
   2165     block_serialno = read_size_t(tail + SST);
   2166     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
   2167        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
   2168      */
   2169     if (original_nbytes <= sizeof(save)) {
   2170         memcpy(save, data, original_nbytes);
   2171         memset(data - 2*SST, DEADBYTE, original_nbytes + 4*SST);
   2172     }
   2173     else {
   2174         memcpy(save, data, ERASED_SIZE);
   2175         memset(head, DEADBYTE, ERASED_SIZE + 2*SST);
   2176         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
   2177         memset(tail - ERASED_SIZE, DEADBYTE, ERASED_SIZE + 2*SST);
   2178     }
   2179 
   2180     /* Resize and add decorations. */
   2181     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
   2182     if (r == NULL) {
   2183         nbytes = original_nbytes;
   2184     }
   2185     else {
   2186         head = r;
   2187         bumpserialno();
   2188         block_serialno = serialno;
   2189     }
   2190 
   2191     write_size_t(head, nbytes);
   2192     head[SST] = (uint8_t)api->api_id;
   2193     memset(head + SST + 1, FORBIDDENBYTE, SST-1);
   2194     data = head + 2*SST;
   2195 
   2196     tail = data + nbytes;
   2197     memset(tail, FORBIDDENBYTE, SST);
   2198     write_size_t(tail + SST, block_serialno);
   2199 
   2200     /* Restore saved bytes. */
   2201     if (original_nbytes <= sizeof(save)) {
   2202         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
   2203     }
   2204     else {
   2205         size_t i = original_nbytes - ERASED_SIZE;
   2206         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
   2207         if (nbytes > i) {
   2208             memcpy(data + i, &save[ERASED_SIZE],
   2209                    Py_MIN(nbytes - i, ERASED_SIZE));
   2210         }
   2211     }
   2212 
   2213     if (r == NULL) {
   2214         return NULL;
   2215     }
   2216 
   2217     if (nbytes > original_nbytes) {
   2218         /* growing:  mark new extra memory clean */
   2219         memset(data + original_nbytes, CLEANBYTE, nbytes - original_nbytes);
   2220     }
   2221 
   2222     return data;
   2223 }
   2224 
   2225 static void
   2226 _PyMem_DebugCheckGIL(void)
   2227 {
   2228     if (!PyGILState_Check())
   2229         Py_FatalError("Python memory allocator called "
   2230                       "without holding the GIL");
   2231 }
   2232 
   2233 static void *
   2234 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
   2235 {
   2236     _PyMem_DebugCheckGIL();
   2237     return _PyMem_DebugRawMalloc(ctx, nbytes);
   2238 }
   2239 
   2240 static void *
   2241 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
   2242 {
   2243     _PyMem_DebugCheckGIL();
   2244     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
   2245 }
   2246 
   2247 
   2248 static void
   2249 _PyMem_DebugFree(void *ctx, void *ptr)
   2250 {
   2251     _PyMem_DebugCheckGIL();
   2252     _PyMem_DebugRawFree(ctx, ptr);
   2253 }
   2254 
   2255 
   2256 static void *
   2257 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
   2258 {
   2259     _PyMem_DebugCheckGIL();
   2260     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
   2261 }
   2262 
   2263 /* Check the forbidden bytes on both ends of the memory allocated for p.
   2264  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
   2265  * and call Py_FatalError to kill the program.
   2266  * The API id, is also checked.
   2267  */
   2268 static void
   2269 _PyMem_DebugCheckAddress(char api, const void *p)
   2270 {
   2271     const uint8_t *q = (const uint8_t *)p;
   2272     char msgbuf[64];
   2273     const char *msg;
   2274     size_t nbytes;
   2275     const uint8_t *tail;
   2276     int i;
   2277     char id;
   2278 
   2279     if (p == NULL) {
   2280         msg = "didn't expect a NULL pointer";
   2281         goto error;
   2282     }
   2283 
   2284     /* Check the API id */
   2285     id = (char)q[-SST];
   2286     if (id != api) {
   2287         msg = msgbuf;
   2288         snprintf(msgbuf, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
   2289         msgbuf[sizeof(msgbuf)-1] = 0;
   2290         goto error;
   2291     }
   2292 
   2293     /* Check the stuff at the start of p first:  if there's underwrite
   2294      * corruption, the number-of-bytes field may be nuts, and checking
   2295      * the tail could lead to a segfault then.
   2296      */
   2297     for (i = SST-1; i >= 1; --i) {
   2298         if (*(q-i) != FORBIDDENBYTE) {
   2299             msg = "bad leading pad byte";
   2300             goto error;
   2301         }
   2302     }
   2303 
   2304     nbytes = read_size_t(q - 2*SST);
   2305     tail = q + nbytes;
   2306     for (i = 0; i < SST; ++i) {
   2307         if (tail[i] != FORBIDDENBYTE) {
   2308             msg = "bad trailing pad byte";
   2309             goto error;
   2310         }
   2311     }
   2312 
   2313     return;
   2314 
   2315 error:
   2316     _PyObject_DebugDumpAddress(p);
   2317     Py_FatalError(msg);
   2318 }
   2319 
   2320 /* Display info to stderr about the memory block at p. */
   2321 static void
   2322 _PyObject_DebugDumpAddress(const void *p)
   2323 {
   2324     const uint8_t *q = (const uint8_t *)p;
   2325     const uint8_t *tail;
   2326     size_t nbytes, serial;
   2327     int i;
   2328     int ok;
   2329     char id;
   2330 
   2331     fprintf(stderr, "Debug memory block at address p=%p:", p);
   2332     if (p == NULL) {
   2333         fprintf(stderr, "\n");
   2334         return;
   2335     }
   2336     id = (char)q[-SST];
   2337     fprintf(stderr, " API '%c'\n", id);
   2338 
   2339     nbytes = read_size_t(q - 2*SST);
   2340     fprintf(stderr, "    %" PY_FORMAT_SIZE_T "u bytes originally "
   2341                     "requested\n", nbytes);
   2342 
   2343     /* In case this is nuts, check the leading pad bytes first. */
   2344     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
   2345     ok = 1;
   2346     for (i = 1; i <= SST-1; ++i) {
   2347         if (*(q-i) != FORBIDDENBYTE) {
   2348             ok = 0;
   2349             break;
   2350         }
   2351     }
   2352     if (ok)
   2353         fputs("FORBIDDENBYTE, as expected.\n", stderr);
   2354     else {
   2355         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
   2356             FORBIDDENBYTE);
   2357         for (i = SST-1; i >= 1; --i) {
   2358             const uint8_t byte = *(q-i);
   2359             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
   2360             if (byte != FORBIDDENBYTE)
   2361                 fputs(" *** OUCH", stderr);
   2362             fputc('\n', stderr);
   2363         }
   2364 
   2365         fputs("    Because memory is corrupted at the start, the "
   2366               "count of bytes requested\n"
   2367               "       may be bogus, and checking the trailing pad "
   2368               "bytes may segfault.\n", stderr);
   2369     }
   2370 
   2371     tail = q + nbytes;
   2372     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, tail);
   2373     ok = 1;
   2374     for (i = 0; i < SST; ++i) {
   2375         if (tail[i] != FORBIDDENBYTE) {
   2376             ok = 0;
   2377             break;
   2378         }
   2379     }
   2380     if (ok)
   2381         fputs("FORBIDDENBYTE, as expected.\n", stderr);
   2382     else {
   2383         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
   2384                 FORBIDDENBYTE);
   2385         for (i = 0; i < SST; ++i) {
   2386             const uint8_t byte = tail[i];
   2387             fprintf(stderr, "        at tail+%d: 0x%02x",
   2388                     i, byte);
   2389             if (byte != FORBIDDENBYTE)
   2390                 fputs(" *** OUCH", stderr);
   2391             fputc('\n', stderr);
   2392         }
   2393     }
   2394 
   2395     serial = read_size_t(tail + SST);
   2396     fprintf(stderr, "    The block was made by call #%" PY_FORMAT_SIZE_T
   2397                     "u to debug malloc/realloc.\n", serial);
   2398 
   2399     if (nbytes > 0) {
   2400         i = 0;
   2401         fputs("    Data at p:", stderr);
   2402         /* print up to 8 bytes at the start */
   2403         while (q < tail && i < 8) {
   2404             fprintf(stderr, " %02x", *q);
   2405             ++i;
   2406             ++q;
   2407         }
   2408         /* and up to 8 at the end */
   2409         if (q < tail) {
   2410             if (tail - q > 8) {
   2411                 fputs(" ...", stderr);
   2412                 q = tail - 8;
   2413             }
   2414             while (q < tail) {
   2415                 fprintf(stderr, " %02x", *q);
   2416                 ++q;
   2417             }
   2418         }
   2419         fputc('\n', stderr);
   2420     }
   2421     fputc('\n', stderr);
   2422 
   2423     fflush(stderr);
   2424     _PyMem_DumpTraceback(fileno(stderr), p);
   2425 }
   2426 
   2427 
   2428 static size_t
   2429 printone(FILE *out, const char* msg, size_t value)
   2430 {
   2431     int i, k;
   2432     char buf[100];
   2433     size_t origvalue = value;
   2434 
   2435     fputs(msg, out);
   2436     for (i = (int)strlen(msg); i < 35; ++i)
   2437         fputc(' ', out);
   2438     fputc('=', out);
   2439 
   2440     /* Write the value with commas. */
   2441     i = 22;
   2442     buf[i--] = '\0';
   2443     buf[i--] = '\n';
   2444     k = 3;
   2445     do {
   2446         size_t nextvalue = value / 10;
   2447         unsigned int digit = (unsigned int)(value - nextvalue * 10);
   2448         value = nextvalue;
   2449         buf[i--] = (char)(digit + '0');
   2450         --k;
   2451         if (k == 0 && value && i >= 0) {
   2452             k = 3;
   2453             buf[i--] = ',';
   2454         }
   2455     } while (value && i >= 0);
   2456 
   2457     while (i >= 0)
   2458         buf[i--] = ' ';
   2459     fputs(buf, out);
   2460 
   2461     return origvalue;
   2462 }
   2463 
   2464 void
   2465 _PyDebugAllocatorStats(FILE *out,
   2466                        const char *block_name, int num_blocks, size_t sizeof_block)
   2467 {
   2468     char buf1[128];
   2469     char buf2[128];
   2470     PyOS_snprintf(buf1, sizeof(buf1),
   2471                   "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
   2472                   num_blocks, block_name, sizeof_block);
   2473     PyOS_snprintf(buf2, sizeof(buf2),
   2474                   "%48s ", buf1);
   2475     (void)printone(out, buf2, num_blocks * sizeof_block);
   2476 }
   2477 
   2478 
   2479 #ifdef WITH_PYMALLOC
   2480 
   2481 #ifdef Py_DEBUG
   2482 /* Is target in the list?  The list is traversed via the nextpool pointers.
   2483  * The list may be NULL-terminated, or circular.  Return 1 if target is in
   2484  * list, else 0.
   2485  */
   2486 static int
   2487 pool_is_in_list(const poolp target, poolp list)
   2488 {
   2489     poolp origlist = list;
   2490     assert(target != NULL);
   2491     if (list == NULL)
   2492         return 0;
   2493     do {
   2494         if (target == list)
   2495             return 1;
   2496         list = list->nextpool;
   2497     } while (list != NULL && list != origlist);
   2498     return 0;
   2499 }
   2500 #endif
   2501 
   2502 /* Print summary info to "out" about the state of pymalloc's structures.
   2503  * In Py_DEBUG mode, also perform some expensive internal consistency
   2504  * checks.
   2505  *
   2506  * Return 0 if the memory debug hooks are not installed or no statistics was
   2507  * written into out, return 1 otherwise.
   2508  */
   2509 int
   2510 _PyObject_DebugMallocStats(FILE *out)
   2511 {
   2512     if (!_PyMem_PymallocEnabled()) {
   2513         return 0;
   2514     }
   2515 
   2516     uint i;
   2517     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
   2518     /* # of pools, allocated blocks, and free blocks per class index */
   2519     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
   2520     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
   2521     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
   2522     /* total # of allocated bytes in used and full pools */
   2523     size_t allocated_bytes = 0;
   2524     /* total # of available bytes in used pools */
   2525     size_t available_bytes = 0;
   2526     /* # of free pools + pools not yet carved out of current arena */
   2527     uint numfreepools = 0;
   2528     /* # of bytes for arena alignment padding */
   2529     size_t arena_alignment = 0;
   2530     /* # of bytes in used and full pools used for pool_headers */
   2531     size_t pool_header_bytes = 0;
   2532     /* # of bytes in used and full pools wasted due to quantization,
   2533      * i.e. the necessarily leftover space at the ends of used and
   2534      * full pools.
   2535      */
   2536     size_t quantization = 0;
   2537     /* # of arenas actually allocated. */
   2538     size_t narenas = 0;
   2539     /* running total -- should equal narenas * ARENA_SIZE */
   2540     size_t total;
   2541     char buf[128];
   2542 
   2543     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
   2544             SMALL_REQUEST_THRESHOLD, numclasses);
   2545 
   2546     for (i = 0; i < numclasses; ++i)
   2547         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
   2548 
   2549     /* Because full pools aren't linked to from anything, it's easiest
   2550      * to march over all the arenas.  If we're lucky, most of the memory
   2551      * will be living in full pools -- would be a shame to miss them.
   2552      */
   2553     for (i = 0; i < maxarenas; ++i) {
   2554         uint j;
   2555         uintptr_t base = arenas[i].address;
   2556 
   2557         /* Skip arenas which are not allocated. */
   2558         if (arenas[i].address == (uintptr_t)NULL)
   2559             continue;
   2560         narenas += 1;
   2561 
   2562         numfreepools += arenas[i].nfreepools;
   2563 
   2564         /* round up to pool alignment */
   2565         if (base & (uintptr_t)POOL_SIZE_MASK) {
   2566             arena_alignment += POOL_SIZE;
   2567             base &= ~(uintptr_t)POOL_SIZE_MASK;
   2568             base += POOL_SIZE;
   2569         }
   2570 
   2571         /* visit every pool in the arena */
   2572         assert(base <= (uintptr_t) arenas[i].pool_address);
   2573         for (j = 0; base < (uintptr_t) arenas[i].pool_address;
   2574              ++j, base += POOL_SIZE) {
   2575             poolp p = (poolp)base;
   2576             const uint sz = p->szidx;
   2577             uint freeblocks;
   2578 
   2579             if (p->ref.count == 0) {
   2580                 /* currently unused */
   2581 #ifdef Py_DEBUG
   2582                 assert(pool_is_in_list(p, arenas[i].freepools));
   2583 #endif
   2584                 continue;
   2585             }
   2586             ++numpools[sz];
   2587             numblocks[sz] += p->ref.count;
   2588             freeblocks = NUMBLOCKS(sz) - p->ref.count;
   2589             numfreeblocks[sz] += freeblocks;
   2590 #ifdef Py_DEBUG
   2591             if (freeblocks > 0)
   2592                 assert(pool_is_in_list(p, usedpools[sz + sz]));
   2593 #endif
   2594         }
   2595     }
   2596     assert(narenas == narenas_currently_allocated);
   2597 
   2598     fputc('\n', out);
   2599     fputs("class   size   num pools   blocks in use  avail blocks\n"
   2600           "-----   ----   ---------   -------------  ------------\n",
   2601           out);
   2602 
   2603     for (i = 0; i < numclasses; ++i) {
   2604         size_t p = numpools[i];
   2605         size_t b = numblocks[i];
   2606         size_t f = numfreeblocks[i];
   2607         uint size = INDEX2SIZE(i);
   2608         if (p == 0) {
   2609             assert(b == 0 && f == 0);
   2610             continue;
   2611         }
   2612         fprintf(out, "%5u %6u "
   2613                         "%11" PY_FORMAT_SIZE_T "u "
   2614                         "%15" PY_FORMAT_SIZE_T "u "
   2615                         "%13" PY_FORMAT_SIZE_T "u\n",
   2616                 i, size, p, b, f);
   2617         allocated_bytes += b * size;
   2618         available_bytes += f * size;
   2619         pool_header_bytes += p * POOL_OVERHEAD;
   2620         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
   2621     }
   2622     fputc('\n', out);
   2623     if (_PyMem_DebugEnabled())
   2624         (void)printone(out, "# times object malloc called", serialno);
   2625     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
   2626     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
   2627     (void)printone(out, "# arenas highwater mark", narenas_highwater);
   2628     (void)printone(out, "# arenas allocated current", narenas);
   2629 
   2630     PyOS_snprintf(buf, sizeof(buf),
   2631         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
   2632         narenas, ARENA_SIZE);
   2633     (void)printone(out, buf, narenas * ARENA_SIZE);
   2634 
   2635     fputc('\n', out);
   2636 
   2637     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
   2638     total += printone(out, "# bytes in available blocks", available_bytes);
   2639 
   2640     PyOS_snprintf(buf, sizeof(buf),
   2641         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
   2642     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
   2643 
   2644     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
   2645     total += printone(out, "# bytes lost to quantization", quantization);
   2646     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
   2647     (void)printone(out, "Total", total);
   2648     return 1;
   2649 }
   2650 
   2651 #endif /* #ifdef WITH_PYMALLOC */
   2652