1 #include "Python.h" 2 3 #include <stdbool.h> 4 5 6 /* Defined in tracemalloc.c */ 7 extern void _PyMem_DumpTraceback(int fd, const void *ptr); 8 9 10 /* Python's malloc wrappers (see pymem.h) */ 11 12 #undef uint 13 #define uint unsigned int /* assuming >= 16 bits */ 14 15 /* Forward declaration */ 16 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size); 17 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize); 18 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size); 19 static void _PyMem_DebugRawFree(void *ctx, void *ptr); 20 21 static void* _PyMem_DebugMalloc(void *ctx, size_t size); 22 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize); 23 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); 24 static void _PyMem_DebugFree(void *ctx, void *p); 25 26 static void _PyObject_DebugDumpAddress(const void *p); 27 static void _PyMem_DebugCheckAddress(char api_id, const void *p); 28 29 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain); 30 31 #if defined(__has_feature) /* Clang */ 32 # if __has_feature(address_sanitizer) /* is ASAN enabled? */ 33 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS \ 34 __attribute__((no_address_safety_analysis)) 35 # endif 36 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */ 37 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) 38 # endif 39 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */ 40 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) 41 # endif 42 #elif defined(__GNUC__) 43 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */ 44 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS \ 45 __attribute__((no_address_safety_analysis)) 46 # endif 47 // TSAN is supported since GCC 4.8, but __SANITIZE_THREAD__ macro 48 // is provided only since GCC 7. 49 # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) 50 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) 51 # endif 52 #endif 53 54 #ifndef _Py_NO_ADDRESS_SAFETY_ANALYSIS 55 # define _Py_NO_ADDRESS_SAFETY_ANALYSIS 56 #endif 57 #ifndef _Py_NO_SANITIZE_THREAD 58 # define _Py_NO_SANITIZE_THREAD 59 #endif 60 #ifndef _Py_NO_SANITIZE_MEMORY 61 # define _Py_NO_SANITIZE_MEMORY 62 #endif 63 64 #ifdef WITH_PYMALLOC 65 66 #ifdef MS_WINDOWS 67 # include <windows.h> 68 #elif defined(HAVE_MMAP) 69 # include <sys/mman.h> 70 # ifdef MAP_ANONYMOUS 71 # define ARENAS_USE_MMAP 72 # endif 73 #endif 74 75 /* Forward declaration */ 76 static void* _PyObject_Malloc(void *ctx, size_t size); 77 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize); 78 static void _PyObject_Free(void *ctx, void *p); 79 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); 80 #endif 81 82 83 static void * 84 _PyMem_RawMalloc(void *ctx, size_t size) 85 { 86 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL 87 for malloc(0), which would be treated as an error. Some platforms would 88 return a pointer with no memory behind it, which would break pymalloc. 89 To solve these problems, allocate an extra byte. */ 90 if (size == 0) 91 size = 1; 92 return malloc(size); 93 } 94 95 static void * 96 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) 97 { 98 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL 99 for calloc(0, 0), which would be treated as an error. Some platforms 100 would return a pointer with no memory behind it, which would break 101 pymalloc. To solve these problems, allocate an extra byte. */ 102 if (nelem == 0 || elsize == 0) { 103 nelem = 1; 104 elsize = 1; 105 } 106 return calloc(nelem, elsize); 107 } 108 109 static void * 110 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size) 111 { 112 if (size == 0) 113 size = 1; 114 return realloc(ptr, size); 115 } 116 117 static void 118 _PyMem_RawFree(void *ctx, void *ptr) 119 { 120 free(ptr); 121 } 122 123 124 #ifdef MS_WINDOWS 125 static void * 126 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size) 127 { 128 return VirtualAlloc(NULL, size, 129 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); 130 } 131 132 static void 133 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size) 134 { 135 VirtualFree(ptr, 0, MEM_RELEASE); 136 } 137 138 #elif defined(ARENAS_USE_MMAP) 139 static void * 140 _PyObject_ArenaMmap(void *ctx, size_t size) 141 { 142 void *ptr; 143 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, 144 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 145 if (ptr == MAP_FAILED) 146 return NULL; 147 assert(ptr != NULL); 148 return ptr; 149 } 150 151 static void 152 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) 153 { 154 munmap(ptr, size); 155 } 156 157 #else 158 static void * 159 _PyObject_ArenaMalloc(void *ctx, size_t size) 160 { 161 return malloc(size); 162 } 163 164 static void 165 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size) 166 { 167 free(ptr); 168 } 169 #endif 170 171 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree} 172 #ifdef WITH_PYMALLOC 173 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free} 174 #endif 175 176 #define PYRAW_ALLOC MALLOC_ALLOC 177 #ifdef WITH_PYMALLOC 178 # define PYOBJ_ALLOC PYMALLOC_ALLOC 179 #else 180 # define PYOBJ_ALLOC MALLOC_ALLOC 181 #endif 182 #define PYMEM_ALLOC PYOBJ_ALLOC 183 184 typedef struct { 185 /* We tag each block with an API ID in order to tag API violations */ 186 char api_id; 187 PyMemAllocatorEx alloc; 188 } debug_alloc_api_t; 189 static struct { 190 debug_alloc_api_t raw; 191 debug_alloc_api_t mem; 192 debug_alloc_api_t obj; 193 } _PyMem_Debug = { 194 {'r', PYRAW_ALLOC}, 195 {'m', PYMEM_ALLOC}, 196 {'o', PYOBJ_ALLOC} 197 }; 198 199 #define PYDBGRAW_ALLOC \ 200 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree} 201 #define PYDBGMEM_ALLOC \ 202 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} 203 #define PYDBGOBJ_ALLOC \ 204 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} 205 206 #ifdef Py_DEBUG 207 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC; 208 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC; 209 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC; 210 #else 211 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC; 212 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC; 213 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC; 214 #endif 215 216 217 static int 218 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug, 219 PyMemAllocatorEx *old_alloc) 220 { 221 if (old_alloc != NULL) { 222 PyMem_GetAllocator(domain, old_alloc); 223 } 224 225 226 PyMemAllocatorEx new_alloc; 227 switch(domain) 228 { 229 case PYMEM_DOMAIN_RAW: 230 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC; 231 break; 232 case PYMEM_DOMAIN_MEM: 233 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC; 234 break; 235 case PYMEM_DOMAIN_OBJ: 236 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC; 237 break; 238 default: 239 /* unknown domain */ 240 return -1; 241 } 242 PyMem_SetAllocator(domain, &new_alloc); 243 if (debug) { 244 _PyMem_SetupDebugHooksDomain(domain); 245 } 246 return 0; 247 } 248 249 250 int 251 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain, 252 PyMemAllocatorEx *old_alloc) 253 { 254 #ifdef Py_DEBUG 255 const int debug = 1; 256 #else 257 const int debug = 0; 258 #endif 259 return pymem_set_default_allocator(domain, debug, old_alloc); 260 } 261 262 263 int 264 _PyMem_SetupAllocators(const char *opt) 265 { 266 if (opt == NULL || *opt == '\0') { 267 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line 268 options): use default memory allocators */ 269 opt = "default"; 270 } 271 272 if (strcmp(opt, "default") == 0) { 273 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL); 274 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL); 275 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL); 276 } 277 else if (strcmp(opt, "debug") == 0) { 278 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL); 279 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL); 280 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL); 281 } 282 #ifdef WITH_PYMALLOC 283 else if (strcmp(opt, "pymalloc") == 0 || strcmp(opt, "pymalloc_debug") == 0) { 284 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 285 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc); 286 287 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC; 288 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc); 289 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc); 290 291 if (strcmp(opt, "pymalloc_debug") == 0) { 292 PyMem_SetupDebugHooks(); 293 } 294 } 295 #endif 296 else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0) { 297 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 298 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc); 299 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc); 300 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc); 301 302 if (strcmp(opt, "malloc_debug") == 0) { 303 PyMem_SetupDebugHooks(); 304 } 305 } 306 else { 307 /* unknown allocator */ 308 return -1; 309 } 310 return 0; 311 } 312 313 314 static int 315 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b) 316 { 317 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0); 318 } 319 320 321 const char* 322 _PyMem_GetAllocatorsName(void) 323 { 324 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 325 #ifdef WITH_PYMALLOC 326 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC; 327 #endif 328 329 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) && 330 pymemallocator_eq(&_PyMem, &malloc_alloc) && 331 pymemallocator_eq(&_PyObject, &malloc_alloc)) 332 { 333 return "malloc"; 334 } 335 #ifdef WITH_PYMALLOC 336 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) && 337 pymemallocator_eq(&_PyMem, &pymalloc) && 338 pymemallocator_eq(&_PyObject, &pymalloc)) 339 { 340 return "pymalloc"; 341 } 342 #endif 343 344 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC; 345 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC; 346 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC; 347 348 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) && 349 pymemallocator_eq(&_PyMem, &dbg_mem) && 350 pymemallocator_eq(&_PyObject, &dbg_obj)) 351 { 352 /* Debug hooks installed */ 353 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) && 354 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) && 355 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc)) 356 { 357 return "malloc_debug"; 358 } 359 #ifdef WITH_PYMALLOC 360 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) && 361 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) && 362 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc)) 363 { 364 return "pymalloc_debug"; 365 } 366 #endif 367 } 368 return NULL; 369 } 370 371 372 #undef MALLOC_ALLOC 373 #undef PYMALLOC_ALLOC 374 #undef PYRAW_ALLOC 375 #undef PYMEM_ALLOC 376 #undef PYOBJ_ALLOC 377 #undef PYDBGRAW_ALLOC 378 #undef PYDBGMEM_ALLOC 379 #undef PYDBGOBJ_ALLOC 380 381 382 static PyObjectArenaAllocator _PyObject_Arena = {NULL, 383 #ifdef MS_WINDOWS 384 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree 385 #elif defined(ARENAS_USE_MMAP) 386 _PyObject_ArenaMmap, _PyObject_ArenaMunmap 387 #else 388 _PyObject_ArenaMalloc, _PyObject_ArenaFree 389 #endif 390 }; 391 392 #ifdef WITH_PYMALLOC 393 static int 394 _PyMem_DebugEnabled(void) 395 { 396 return (_PyObject.malloc == _PyMem_DebugMalloc); 397 } 398 399 static int 400 _PyMem_PymallocEnabled(void) 401 { 402 if (_PyMem_DebugEnabled()) { 403 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc); 404 } 405 else { 406 return (_PyObject.malloc == _PyObject_Malloc); 407 } 408 } 409 #endif 410 411 412 static void 413 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain) 414 { 415 PyMemAllocatorEx alloc; 416 417 if (domain == PYMEM_DOMAIN_RAW) { 418 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) { 419 return; 420 } 421 422 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc); 423 alloc.ctx = &_PyMem_Debug.raw; 424 alloc.malloc = _PyMem_DebugRawMalloc; 425 alloc.calloc = _PyMem_DebugRawCalloc; 426 alloc.realloc = _PyMem_DebugRawRealloc; 427 alloc.free = _PyMem_DebugRawFree; 428 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); 429 } 430 else if (domain == PYMEM_DOMAIN_MEM) { 431 if (_PyMem.malloc == _PyMem_DebugMalloc) { 432 return; 433 } 434 435 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc); 436 alloc.ctx = &_PyMem_Debug.mem; 437 alloc.malloc = _PyMem_DebugMalloc; 438 alloc.calloc = _PyMem_DebugCalloc; 439 alloc.realloc = _PyMem_DebugRealloc; 440 alloc.free = _PyMem_DebugFree; 441 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); 442 } 443 else if (domain == PYMEM_DOMAIN_OBJ) { 444 if (_PyObject.malloc == _PyMem_DebugMalloc) { 445 return; 446 } 447 448 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc); 449 alloc.ctx = &_PyMem_Debug.obj; 450 alloc.malloc = _PyMem_DebugMalloc; 451 alloc.calloc = _PyMem_DebugCalloc; 452 alloc.realloc = _PyMem_DebugRealloc; 453 alloc.free = _PyMem_DebugFree; 454 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); 455 } 456 } 457 458 459 void 460 PyMem_SetupDebugHooks(void) 461 { 462 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW); 463 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM); 464 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ); 465 } 466 467 void 468 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 469 { 470 switch(domain) 471 { 472 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; 473 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; 474 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; 475 default: 476 /* unknown domain: set all attributes to NULL */ 477 allocator->ctx = NULL; 478 allocator->malloc = NULL; 479 allocator->calloc = NULL; 480 allocator->realloc = NULL; 481 allocator->free = NULL; 482 } 483 } 484 485 void 486 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 487 { 488 switch(domain) 489 { 490 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; 491 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; 492 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; 493 /* ignore unknown domain */ 494 } 495 } 496 497 void 498 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) 499 { 500 *allocator = _PyObject_Arena; 501 } 502 503 void 504 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) 505 { 506 _PyObject_Arena = *allocator; 507 } 508 509 void * 510 PyMem_RawMalloc(size_t size) 511 { 512 /* 513 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. 514 * Most python internals blindly use a signed Py_ssize_t to track 515 * things without checking for overflows or negatives. 516 * As size_t is unsigned, checking for size < 0 is not required. 517 */ 518 if (size > (size_t)PY_SSIZE_T_MAX) 519 return NULL; 520 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); 521 } 522 523 void * 524 PyMem_RawCalloc(size_t nelem, size_t elsize) 525 { 526 /* see PyMem_RawMalloc() */ 527 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 528 return NULL; 529 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); 530 } 531 532 void* 533 PyMem_RawRealloc(void *ptr, size_t new_size) 534 { 535 /* see PyMem_RawMalloc() */ 536 if (new_size > (size_t)PY_SSIZE_T_MAX) 537 return NULL; 538 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); 539 } 540 541 void PyMem_RawFree(void *ptr) 542 { 543 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); 544 } 545 546 547 void * 548 PyMem_Malloc(size_t size) 549 { 550 /* see PyMem_RawMalloc() */ 551 if (size > (size_t)PY_SSIZE_T_MAX) 552 return NULL; 553 return _PyMem.malloc(_PyMem.ctx, size); 554 } 555 556 void * 557 PyMem_Calloc(size_t nelem, size_t elsize) 558 { 559 /* see PyMem_RawMalloc() */ 560 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 561 return NULL; 562 return _PyMem.calloc(_PyMem.ctx, nelem, elsize); 563 } 564 565 void * 566 PyMem_Realloc(void *ptr, size_t new_size) 567 { 568 /* see PyMem_RawMalloc() */ 569 if (new_size > (size_t)PY_SSIZE_T_MAX) 570 return NULL; 571 return _PyMem.realloc(_PyMem.ctx, ptr, new_size); 572 } 573 574 void 575 PyMem_Free(void *ptr) 576 { 577 _PyMem.free(_PyMem.ctx, ptr); 578 } 579 580 581 wchar_t* 582 _PyMem_RawWcsdup(const wchar_t *str) 583 { 584 assert(str != NULL); 585 586 size_t len = wcslen(str); 587 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { 588 return NULL; 589 } 590 591 size_t size = (len + 1) * sizeof(wchar_t); 592 wchar_t *str2 = PyMem_RawMalloc(size); 593 if (str2 == NULL) { 594 return NULL; 595 } 596 597 memcpy(str2, str, size); 598 return str2; 599 } 600 601 char * 602 _PyMem_RawStrdup(const char *str) 603 { 604 assert(str != NULL); 605 size_t size = strlen(str) + 1; 606 char *copy = PyMem_RawMalloc(size); 607 if (copy == NULL) { 608 return NULL; 609 } 610 memcpy(copy, str, size); 611 return copy; 612 } 613 614 char * 615 _PyMem_Strdup(const char *str) 616 { 617 assert(str != NULL); 618 size_t size = strlen(str) + 1; 619 char *copy = PyMem_Malloc(size); 620 if (copy == NULL) { 621 return NULL; 622 } 623 memcpy(copy, str, size); 624 return copy; 625 } 626 627 void * 628 PyObject_Malloc(size_t size) 629 { 630 /* see PyMem_RawMalloc() */ 631 if (size > (size_t)PY_SSIZE_T_MAX) 632 return NULL; 633 return _PyObject.malloc(_PyObject.ctx, size); 634 } 635 636 void * 637 PyObject_Calloc(size_t nelem, size_t elsize) 638 { 639 /* see PyMem_RawMalloc() */ 640 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 641 return NULL; 642 return _PyObject.calloc(_PyObject.ctx, nelem, elsize); 643 } 644 645 void * 646 PyObject_Realloc(void *ptr, size_t new_size) 647 { 648 /* see PyMem_RawMalloc() */ 649 if (new_size > (size_t)PY_SSIZE_T_MAX) 650 return NULL; 651 return _PyObject.realloc(_PyObject.ctx, ptr, new_size); 652 } 653 654 void 655 PyObject_Free(void *ptr) 656 { 657 _PyObject.free(_PyObject.ctx, ptr); 658 } 659 660 661 #ifdef WITH_PYMALLOC 662 663 #ifdef WITH_VALGRIND 664 #include <valgrind/valgrind.h> 665 666 /* If we're using GCC, use __builtin_expect() to reduce overhead of 667 the valgrind checks */ 668 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) 669 # define UNLIKELY(value) __builtin_expect((value), 0) 670 #else 671 # define UNLIKELY(value) (value) 672 #endif 673 674 /* -1 indicates that we haven't checked that we're running on valgrind yet. */ 675 static int running_on_valgrind = -1; 676 #endif 677 678 679 /* An object allocator for Python. 680 681 Here is an introduction to the layers of the Python memory architecture, 682 showing where the object allocator is actually used (layer +2), It is 683 called for every object allocation and deallocation (PyObject_New/Del), 684 unless the object-specific allocators implement a proprietary allocation 685 scheme (ex.: ints use a simple free list). This is also the place where 686 the cyclic garbage collector operates selectively on container objects. 687 688 689 Object-specific allocators 690 _____ ______ ______ ________ 691 [ int ] [ dict ] [ list ] ... [ string ] Python core | 692 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> | 693 _______________________________ | | 694 [ Python's object allocator ] | | 695 +2 | ####### Object memory ####### | <------ Internal buffers ------> | 696 ______________________________________________________________ | 697 [ Python's raw memory allocator (PyMem_ API) ] | 698 +1 | <----- Python memory (under PyMem manager's control) ------> | | 699 __________________________________________________________________ 700 [ Underlying general-purpose allocator (ex: C library malloc) ] 701 0 | <------ Virtual memory allocated for the python process -------> | 702 703 ========================================================================= 704 _______________________________________________________________________ 705 [ OS-specific Virtual Memory Manager (VMM) ] 706 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | 707 __________________________________ __________________________________ 708 [ ] [ ] 709 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | 710 711 */ 712 /*==========================================================================*/ 713 714 /* A fast, special-purpose memory allocator for small blocks, to be used 715 on top of a general-purpose malloc -- heavily based on previous art. */ 716 717 /* Vladimir Marangozov -- August 2000 */ 718 719 /* 720 * "Memory management is where the rubber meets the road -- if we do the wrong 721 * thing at any level, the results will not be good. And if we don't make the 722 * levels work well together, we are in serious trouble." (1) 723 * 724 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, 725 * "Dynamic Storage Allocation: A Survey and Critical Review", 726 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. 727 */ 728 729 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ 730 731 /*==========================================================================*/ 732 733 /* 734 * Allocation strategy abstract: 735 * 736 * For small requests, the allocator sub-allocates <Big> blocks of memory. 737 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the 738 * system's allocator. 739 * 740 * Small requests are grouped in size classes spaced 8 bytes apart, due 741 * to the required valid alignment of the returned address. Requests of 742 * a particular size are serviced from memory pools of 4K (one VMM page). 743 * Pools are fragmented on demand and contain free lists of blocks of one 744 * particular size class. In other words, there is a fixed-size allocator 745 * for each size class. Free pools are shared by the different allocators 746 * thus minimizing the space reserved for a particular size class. 747 * 748 * This allocation strategy is a variant of what is known as "simple 749 * segregated storage based on array of free lists". The main drawback of 750 * simple segregated storage is that we might end up with lot of reserved 751 * memory for the different free lists, which degenerate in time. To avoid 752 * this, we partition each free list in pools and we share dynamically the 753 * reserved space between all free lists. This technique is quite efficient 754 * for memory intensive programs which allocate mainly small-sized blocks. 755 * 756 * For small requests we have the following table: 757 * 758 * Request in bytes Size of allocated block Size class idx 759 * ---------------------------------------------------------------- 760 * 1-8 8 0 761 * 9-16 16 1 762 * 17-24 24 2 763 * 25-32 32 3 764 * 33-40 40 4 765 * 41-48 48 5 766 * 49-56 56 6 767 * 57-64 64 7 768 * 65-72 72 8 769 * ... ... ... 770 * 497-504 504 62 771 * 505-512 512 63 772 * 773 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying 774 * allocator. 775 */ 776 777 /*==========================================================================*/ 778 779 /* 780 * -- Main tunable settings section -- 781 */ 782 783 /* 784 * Alignment of addresses returned to the user. 8-bytes alignment works 785 * on most current architectures (with 32-bit or 64-bit address busses). 786 * The alignment value is also used for grouping small requests in size 787 * classes spaced ALIGNMENT bytes apart. 788 * 789 * You shouldn't change this unless you know what you are doing. 790 */ 791 #define ALIGNMENT 8 /* must be 2^N */ 792 #define ALIGNMENT_SHIFT 3 793 794 /* Return the number of bytes in size class I, as a uint. */ 795 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) 796 797 /* 798 * Max size threshold below which malloc requests are considered to be 799 * small enough in order to use preallocated memory pools. You can tune 800 * this value according to your application behaviour and memory needs. 801 * 802 * Note: a size threshold of 512 guarantees that newly created dictionaries 803 * will be allocated from preallocated memory pools on 64-bit. 804 * 805 * The following invariants must hold: 806 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 807 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT 808 * 809 * Although not required, for better performance and space efficiency, 810 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. 811 */ 812 #define SMALL_REQUEST_THRESHOLD 512 813 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) 814 815 /* 816 * The system's VMM page size can be obtained on most unices with a 817 * getpagesize() call or deduced from various header files. To make 818 * things simpler, we assume that it is 4K, which is OK for most systems. 819 * It is probably better if this is the native page size, but it doesn't 820 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page 821 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation 822 * violation fault. 4K is apparently OK for all the platforms that python 823 * currently targets. 824 */ 825 #define SYSTEM_PAGE_SIZE (4 * 1024) 826 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) 827 828 /* 829 * Maximum amount of memory managed by the allocator for small requests. 830 */ 831 #ifdef WITH_MEMORY_LIMITS 832 #ifndef SMALL_MEMORY_LIMIT 833 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ 834 #endif 835 #endif 836 837 /* 838 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned 839 * on a page boundary. This is a reserved virtual address space for the 840 * current process (obtained through a malloc()/mmap() call). In no way this 841 * means that the memory arenas will be used entirely. A malloc(<Big>) is 842 * usually an address range reservation for <Big> bytes, unless all pages within 843 * this space are referenced subsequently. So malloc'ing big blocks and not 844 * using them does not mean "wasting memory". It's an addressable range 845 * wastage... 846 * 847 * Arenas are allocated with mmap() on systems supporting anonymous memory 848 * mappings to reduce heap fragmentation. 849 */ 850 #define ARENA_SIZE (256 << 10) /* 256KB */ 851 852 #ifdef WITH_MEMORY_LIMITS 853 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) 854 #endif 855 856 /* 857 * Size of the pools used for small blocks. Should be a power of 2, 858 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. 859 */ 860 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ 861 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK 862 863 /* 864 * -- End of tunable settings section -- 865 */ 866 867 /*==========================================================================*/ 868 869 /* 870 * Locking 871 * 872 * To reduce lock contention, it would probably be better to refine the 873 * crude function locking with per size class locking. I'm not positive 874 * however, whether it's worth switching to such locking policy because 875 * of the performance penalty it might introduce. 876 * 877 * The following macros describe the simplest (should also be the fastest) 878 * lock object on a particular platform and the init/fini/lock/unlock 879 * operations on it. The locks defined here are not expected to be recursive 880 * because it is assumed that they will always be called in the order: 881 * INIT, [LOCK, UNLOCK]*, FINI. 882 */ 883 884 /* 885 * Python's threads are serialized, so object malloc locking is disabled. 886 */ 887 #define SIMPLELOCK_DECL(lock) /* simple lock declaration */ 888 #define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ 889 #define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ 890 #define SIMPLELOCK_LOCK(lock) /* acquire released lock */ 891 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ 892 893 /* When you say memory, my mind reasons in terms of (pointers to) blocks */ 894 typedef uint8_t block; 895 896 /* Pool for small blocks. */ 897 struct pool_header { 898 union { block *_padding; 899 uint count; } ref; /* number of allocated blocks */ 900 block *freeblock; /* pool's free list head */ 901 struct pool_header *nextpool; /* next pool of this size class */ 902 struct pool_header *prevpool; /* previous pool "" */ 903 uint arenaindex; /* index into arenas of base adr */ 904 uint szidx; /* block size class index */ 905 uint nextoffset; /* bytes to virgin block */ 906 uint maxnextoffset; /* largest valid nextoffset */ 907 }; 908 909 typedef struct pool_header *poolp; 910 911 /* Record keeping for arenas. */ 912 struct arena_object { 913 /* The address of the arena, as returned by malloc. Note that 0 914 * will never be returned by a successful malloc, and is used 915 * here to mark an arena_object that doesn't correspond to an 916 * allocated arena. 917 */ 918 uintptr_t address; 919 920 /* Pool-aligned pointer to the next pool to be carved off. */ 921 block* pool_address; 922 923 /* The number of available pools in the arena: free pools + never- 924 * allocated pools. 925 */ 926 uint nfreepools; 927 928 /* The total number of pools in the arena, whether or not available. */ 929 uint ntotalpools; 930 931 /* Singly-linked list of available pools. */ 932 struct pool_header* freepools; 933 934 /* Whenever this arena_object is not associated with an allocated 935 * arena, the nextarena member is used to link all unassociated 936 * arena_objects in the singly-linked `unused_arena_objects` list. 937 * The prevarena member is unused in this case. 938 * 939 * When this arena_object is associated with an allocated arena 940 * with at least one available pool, both members are used in the 941 * doubly-linked `usable_arenas` list, which is maintained in 942 * increasing order of `nfreepools` values. 943 * 944 * Else this arena_object is associated with an allocated arena 945 * all of whose pools are in use. `nextarena` and `prevarena` 946 * are both meaningless in this case. 947 */ 948 struct arena_object* nextarena; 949 struct arena_object* prevarena; 950 }; 951 952 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) 953 954 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ 955 956 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ 957 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) 958 959 /* Return total number of blocks in pool of size index I, as a uint. */ 960 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) 961 962 /*==========================================================================*/ 963 964 /* 965 * This malloc lock 966 */ 967 SIMPLELOCK_DECL(_malloc_lock) 968 #define LOCK() SIMPLELOCK_LOCK(_malloc_lock) 969 #define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) 970 #define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) 971 #define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) 972 973 /* 974 * Pool table -- headed, circular, doubly-linked lists of partially used pools. 975 976 This is involved. For an index i, usedpools[i+i] is the header for a list of 977 all partially used pools holding small blocks with "size class idx" i. So 978 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size 979 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. 980 981 Pools are carved off an arena's highwater mark (an arena_object's pool_address 982 member) as needed. Once carved off, a pool is in one of three states forever 983 after: 984 985 used == partially used, neither empty nor full 986 At least one block in the pool is currently allocated, and at least one 987 block in the pool is not currently allocated (note this implies a pool 988 has room for at least two blocks). 989 This is a pool's initial state, as a pool is created only when malloc 990 needs space. 991 The pool holds blocks of a fixed size, and is in the circular list headed 992 at usedpools[i] (see above). It's linked to the other used pools of the 993 same size class via the pool_header's nextpool and prevpool members. 994 If all but one block is currently allocated, a malloc can cause a 995 transition to the full state. If all but one block is not currently 996 allocated, a free can cause a transition to the empty state. 997 998 full == all the pool's blocks are currently allocated 999 On transition to full, a pool is unlinked from its usedpools[] list. 1000 It's not linked to from anything then anymore, and its nextpool and 1001 prevpool members are meaningless until it transitions back to used. 1002 A free of a block in a full pool puts the pool back in the used state. 1003 Then it's linked in at the front of the appropriate usedpools[] list, so 1004 that the next allocation for its size class will reuse the freed block. 1005 1006 empty == all the pool's blocks are currently available for allocation 1007 On transition to empty, a pool is unlinked from its usedpools[] list, 1008 and linked to the front of its arena_object's singly-linked freepools list, 1009 via its nextpool member. The prevpool member has no meaning in this case. 1010 Empty pools have no inherent size class: the next time a malloc finds 1011 an empty list in usedpools[], it takes the first pool off of freepools. 1012 If the size class needed happens to be the same as the size class the pool 1013 last had, some pool initialization can be skipped. 1014 1015 1016 Block Management 1017 1018 Blocks within pools are again carved out as needed. pool->freeblock points to 1019 the start of a singly-linked list of free blocks within the pool. When a 1020 block is freed, it's inserted at the front of its pool's freeblock list. Note 1021 that the available blocks in a pool are *not* linked all together when a pool 1022 is initialized. Instead only "the first two" (lowest addresses) blocks are 1023 set up, returning the first such block, and setting pool->freeblock to a 1024 one-block list holding the second such block. This is consistent with that 1025 pymalloc strives at all levels (arena, pool, and block) never to touch a piece 1026 of memory until it's actually needed. 1027 1028 So long as a pool is in the used state, we're certain there *is* a block 1029 available for allocating, and pool->freeblock is not NULL. If pool->freeblock 1030 points to the end of the free list before we've carved the entire pool into 1031 blocks, that means we simply haven't yet gotten to one of the higher-address 1032 blocks. The offset from the pool_header to the start of "the next" virgin 1033 block is stored in the pool_header nextoffset member, and the largest value 1034 of nextoffset that makes sense is stored in the maxnextoffset member when a 1035 pool is initialized. All the blocks in a pool have been passed out at least 1036 once when and only when nextoffset > maxnextoffset. 1037 1038 1039 Major obscurity: While the usedpools vector is declared to have poolp 1040 entries, it doesn't really. It really contains two pointers per (conceptual) 1041 poolp entry, the nextpool and prevpool members of a pool_header. The 1042 excruciating initialization code below fools C so that 1043 1044 usedpool[i+i] 1045 1046 "acts like" a genuine poolp, but only so long as you only reference its 1047 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is 1048 compensating for that a pool_header's nextpool and prevpool members 1049 immediately follow a pool_header's first two members: 1050 1051 union { block *_padding; 1052 uint count; } ref; 1053 block *freeblock; 1054 1055 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really 1056 contains is a fudged-up pointer p such that *if* C believes it's a poolp 1057 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed 1058 circular list is empty). 1059 1060 It's unclear why the usedpools setup is so convoluted. It could be to 1061 minimize the amount of cache required to hold this heavily-referenced table 1062 (which only *needs* the two interpool pointer members of a pool_header). OTOH, 1063 referencing code has to remember to "double the index" and doing so isn't 1064 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying 1065 on that C doesn't insert any padding anywhere in a pool_header at or before 1066 the prevpool member. 1067 **************************************************************************** */ 1068 1069 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) 1070 #define PT(x) PTA(x), PTA(x) 1071 1072 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { 1073 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) 1074 #if NB_SMALL_SIZE_CLASSES > 8 1075 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) 1076 #if NB_SMALL_SIZE_CLASSES > 16 1077 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) 1078 #if NB_SMALL_SIZE_CLASSES > 24 1079 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) 1080 #if NB_SMALL_SIZE_CLASSES > 32 1081 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) 1082 #if NB_SMALL_SIZE_CLASSES > 40 1083 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) 1084 #if NB_SMALL_SIZE_CLASSES > 48 1085 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) 1086 #if NB_SMALL_SIZE_CLASSES > 56 1087 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) 1088 #if NB_SMALL_SIZE_CLASSES > 64 1089 #error "NB_SMALL_SIZE_CLASSES should be less than 64" 1090 #endif /* NB_SMALL_SIZE_CLASSES > 64 */ 1091 #endif /* NB_SMALL_SIZE_CLASSES > 56 */ 1092 #endif /* NB_SMALL_SIZE_CLASSES > 48 */ 1093 #endif /* NB_SMALL_SIZE_CLASSES > 40 */ 1094 #endif /* NB_SMALL_SIZE_CLASSES > 32 */ 1095 #endif /* NB_SMALL_SIZE_CLASSES > 24 */ 1096 #endif /* NB_SMALL_SIZE_CLASSES > 16 */ 1097 #endif /* NB_SMALL_SIZE_CLASSES > 8 */ 1098 }; 1099 1100 /*========================================================================== 1101 Arena management. 1102 1103 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of 1104 which may not be currently used (== they're arena_objects that aren't 1105 currently associated with an allocated arena). Note that arenas proper are 1106 separately malloc'ed. 1107 1108 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, 1109 we do try to free() arenas, and use some mild heuristic strategies to increase 1110 the likelihood that arenas eventually can be freed. 1111 1112 unused_arena_objects 1113 1114 This is a singly-linked list of the arena_objects that are currently not 1115 being used (no arena is associated with them). Objects are taken off the 1116 head of the list in new_arena(), and are pushed on the head of the list in 1117 PyObject_Free() when the arena is empty. Key invariant: an arena_object 1118 is on this list if and only if its .address member is 0. 1119 1120 usable_arenas 1121 1122 This is a doubly-linked list of the arena_objects associated with arenas 1123 that have pools available. These pools are either waiting to be reused, 1124 or have not been used before. The list is sorted to have the most- 1125 allocated arenas first (ascending order based on the nfreepools member). 1126 This means that the next allocation will come from a heavily used arena, 1127 which gives the nearly empty arenas a chance to be returned to the system. 1128 In my unscientific tests this dramatically improved the number of arenas 1129 that could be freed. 1130 1131 Note that an arena_object associated with an arena all of whose pools are 1132 currently in use isn't on either list. 1133 */ 1134 1135 /* Array of objects used to track chunks of memory (arenas). */ 1136 static struct arena_object* arenas = NULL; 1137 /* Number of slots currently allocated in the `arenas` vector. */ 1138 static uint maxarenas = 0; 1139 1140 /* The head of the singly-linked, NULL-terminated list of available 1141 * arena_objects. 1142 */ 1143 static struct arena_object* unused_arena_objects = NULL; 1144 1145 /* The head of the doubly-linked, NULL-terminated at each end, list of 1146 * arena_objects associated with arenas that have pools available. 1147 */ 1148 static struct arena_object* usable_arenas = NULL; 1149 1150 /* How many arena_objects do we initially allocate? 1151 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the 1152 * `arenas` vector. 1153 */ 1154 #define INITIAL_ARENA_OBJECTS 16 1155 1156 /* Number of arenas allocated that haven't been free()'d. */ 1157 static size_t narenas_currently_allocated = 0; 1158 1159 /* Total number of times malloc() called to allocate an arena. */ 1160 static size_t ntimes_arena_allocated = 0; 1161 /* High water mark (max value ever seen) for narenas_currently_allocated. */ 1162 static size_t narenas_highwater = 0; 1163 1164 static Py_ssize_t _Py_AllocatedBlocks = 0; 1165 1166 Py_ssize_t 1167 _Py_GetAllocatedBlocks(void) 1168 { 1169 return _Py_AllocatedBlocks; 1170 } 1171 1172 1173 /* Allocate a new arena. If we run out of memory, return NULL. Else 1174 * allocate a new arena, and return the address of an arena_object 1175 * describing the new arena. It's expected that the caller will set 1176 * `usable_arenas` to the return value. 1177 */ 1178 static struct arena_object* 1179 new_arena(void) 1180 { 1181 struct arena_object* arenaobj; 1182 uint excess; /* number of bytes above pool alignment */ 1183 void *address; 1184 static int debug_stats = -1; 1185 1186 if (debug_stats == -1) { 1187 const char *opt = Py_GETENV("PYTHONMALLOCSTATS"); 1188 debug_stats = (opt != NULL && *opt != '\0'); 1189 } 1190 if (debug_stats) 1191 _PyObject_DebugMallocStats(stderr); 1192 1193 if (unused_arena_objects == NULL) { 1194 uint i; 1195 uint numarenas; 1196 size_t nbytes; 1197 1198 /* Double the number of arena objects on each allocation. 1199 * Note that it's possible for `numarenas` to overflow. 1200 */ 1201 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; 1202 if (numarenas <= maxarenas) 1203 return NULL; /* overflow */ 1204 #if SIZEOF_SIZE_T <= SIZEOF_INT 1205 if (numarenas > SIZE_MAX / sizeof(*arenas)) 1206 return NULL; /* overflow */ 1207 #endif 1208 nbytes = numarenas * sizeof(*arenas); 1209 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); 1210 if (arenaobj == NULL) 1211 return NULL; 1212 arenas = arenaobj; 1213 1214 /* We might need to fix pointers that were copied. However, 1215 * new_arena only gets called when all the pages in the 1216 * previous arenas are full. Thus, there are *no* pointers 1217 * into the old array. Thus, we don't have to worry about 1218 * invalid pointers. Just to be sure, some asserts: 1219 */ 1220 assert(usable_arenas == NULL); 1221 assert(unused_arena_objects == NULL); 1222 1223 /* Put the new arenas on the unused_arena_objects list. */ 1224 for (i = maxarenas; i < numarenas; ++i) { 1225 arenas[i].address = 0; /* mark as unassociated */ 1226 arenas[i].nextarena = i < numarenas - 1 ? 1227 &arenas[i+1] : NULL; 1228 } 1229 1230 /* Update globals. */ 1231 unused_arena_objects = &arenas[maxarenas]; 1232 maxarenas = numarenas; 1233 } 1234 1235 /* Take the next available arena object off the head of the list. */ 1236 assert(unused_arena_objects != NULL); 1237 arenaobj = unused_arena_objects; 1238 unused_arena_objects = arenaobj->nextarena; 1239 assert(arenaobj->address == 0); 1240 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); 1241 if (address == NULL) { 1242 /* The allocation failed: return NULL after putting the 1243 * arenaobj back. 1244 */ 1245 arenaobj->nextarena = unused_arena_objects; 1246 unused_arena_objects = arenaobj; 1247 return NULL; 1248 } 1249 arenaobj->address = (uintptr_t)address; 1250 1251 ++narenas_currently_allocated; 1252 ++ntimes_arena_allocated; 1253 if (narenas_currently_allocated > narenas_highwater) 1254 narenas_highwater = narenas_currently_allocated; 1255 arenaobj->freepools = NULL; 1256 /* pool_address <- first pool-aligned address in the arena 1257 nfreepools <- number of whole pools that fit after alignment */ 1258 arenaobj->pool_address = (block*)arenaobj->address; 1259 arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE; 1260 assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE); 1261 excess = (uint)(arenaobj->address & POOL_SIZE_MASK); 1262 if (excess != 0) { 1263 --arenaobj->nfreepools; 1264 arenaobj->pool_address += POOL_SIZE - excess; 1265 } 1266 arenaobj->ntotalpools = arenaobj->nfreepools; 1267 1268 return arenaobj; 1269 } 1270 1271 1272 /* 1273 address_in_range(P, POOL) 1274 1275 Return true if and only if P is an address that was allocated by pymalloc. 1276 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) 1277 (the caller is asked to compute this because the macro expands POOL more than 1278 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a 1279 variable and pass the latter to the macro; because address_in_range is 1280 called on every alloc/realloc/free, micro-efficiency is important here). 1281 1282 Tricky: Let B be the arena base address associated with the pool, B = 1283 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if 1284 1285 B <= P < B + ARENA_SIZE 1286 1287 Subtracting B throughout, this is true iff 1288 1289 0 <= P-B < ARENA_SIZE 1290 1291 By using unsigned arithmetic, the "0 <=" half of the test can be skipped. 1292 1293 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc 1294 before the first arena has been allocated. `arenas` is still NULL in that 1295 case. We're relying on that maxarenas is also 0 in that case, so that 1296 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index 1297 into a NULL arenas. 1298 1299 Details: given P and POOL, the arena_object corresponding to P is AO = 1300 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild 1301 stores, etc), POOL is the correct address of P's pool, AO.address is the 1302 correct base address of the pool's arena, and P must be within ARENA_SIZE of 1303 AO.address. In addition, AO.address is not 0 (no arena can start at address 0 1304 (NULL)). Therefore address_in_range correctly reports that obmalloc 1305 controls P. 1306 1307 Now suppose obmalloc does not control P (e.g., P was obtained via a direct 1308 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything 1309 in this case -- it may even be uninitialized trash. If the trash arenaindex 1310 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't 1311 control P. 1312 1313 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an 1314 allocated arena, obmalloc controls all the memory in slice AO.address : 1315 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, 1316 so P doesn't lie in that slice, so the macro correctly reports that P is not 1317 controlled by obmalloc. 1318 1319 Finally, if P is not controlled by obmalloc and AO corresponds to an unused 1320 arena_object (one not currently associated with an allocated arena), 1321 AO.address is 0, and the second test in the macro reduces to: 1322 1323 P < ARENA_SIZE 1324 1325 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes 1326 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part 1327 of the test still passes, and the third clause (AO.address != 0) is necessary 1328 to get the correct result: AO.address is 0 in this case, so the macro 1329 correctly reports that P is not controlled by obmalloc (despite that P lies in 1330 slice AO.address : AO.address + ARENA_SIZE). 1331 1332 Note: The third (AO.address != 0) clause was added in Python 2.5. Before 1333 2.5, arenas were never free()'ed, and an arenaindex < maxarena always 1334 corresponded to a currently-allocated arena, so the "P is not controlled by 1335 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case 1336 was impossible. 1337 1338 Note that the logic is excruciating, and reading up possibly uninitialized 1339 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) 1340 creates problems for some memory debuggers. The overwhelming advantage is 1341 that this test determines whether an arbitrary address is controlled by 1342 obmalloc in a small constant time, independent of the number of arenas 1343 obmalloc controls. Since this test is needed at every entry point, it's 1344 extremely desirable that it be this fast. 1345 */ 1346 1347 static bool _Py_NO_ADDRESS_SAFETY_ANALYSIS 1348 _Py_NO_SANITIZE_THREAD 1349 _Py_NO_SANITIZE_MEMORY 1350 address_in_range(void *p, poolp pool) 1351 { 1352 // Since address_in_range may be reading from memory which was not allocated 1353 // by Python, it is important that pool->arenaindex is read only once, as 1354 // another thread may be concurrently modifying the value without holding 1355 // the GIL. The following dance forces the compiler to read pool->arenaindex 1356 // only once. 1357 uint arenaindex = *((volatile uint *)&pool->arenaindex); 1358 return arenaindex < maxarenas && 1359 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && 1360 arenas[arenaindex].address != 0; 1361 } 1362 1363 1364 /*==========================================================================*/ 1365 1366 /* pymalloc allocator 1367 1368 The basic blocks are ordered by decreasing execution frequency, 1369 which minimizes the number of jumps in the most common cases, 1370 improves branching prediction and instruction scheduling (small 1371 block allocations typically result in a couple of instructions). 1372 Unless the optimizer reorders everything, being too smart... 1373 1374 Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p. 1375 1376 Return 0 if pymalloc failed to allocate the memory block: on bigger 1377 requests, on error in the code below (as a last chance to serve the request) 1378 or when the max memory limit has been reached. */ 1379 static int 1380 pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes) 1381 { 1382 block *bp; 1383 poolp pool; 1384 poolp next; 1385 uint size; 1386 1387 #ifdef WITH_VALGRIND 1388 if (UNLIKELY(running_on_valgrind == -1)) { 1389 running_on_valgrind = RUNNING_ON_VALGRIND; 1390 } 1391 if (UNLIKELY(running_on_valgrind)) { 1392 return 0; 1393 } 1394 #endif 1395 1396 if (nbytes == 0) { 1397 return 0; 1398 } 1399 if (nbytes > SMALL_REQUEST_THRESHOLD) { 1400 return 0; 1401 } 1402 1403 LOCK(); 1404 /* 1405 * Most frequent paths first 1406 */ 1407 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; 1408 pool = usedpools[size + size]; 1409 if (pool != pool->nextpool) { 1410 /* 1411 * There is a used pool for this size class. 1412 * Pick up the head block of its free list. 1413 */ 1414 ++pool->ref.count; 1415 bp = pool->freeblock; 1416 assert(bp != NULL); 1417 if ((pool->freeblock = *(block **)bp) != NULL) { 1418 goto success; 1419 } 1420 1421 /* 1422 * Reached the end of the free list, try to extend it. 1423 */ 1424 if (pool->nextoffset <= pool->maxnextoffset) { 1425 /* There is room for another block. */ 1426 pool->freeblock = (block*)pool + 1427 pool->nextoffset; 1428 pool->nextoffset += INDEX2SIZE(size); 1429 *(block **)(pool->freeblock) = NULL; 1430 goto success; 1431 } 1432 1433 /* Pool is full, unlink from used pools. */ 1434 next = pool->nextpool; 1435 pool = pool->prevpool; 1436 next->prevpool = pool; 1437 pool->nextpool = next; 1438 goto success; 1439 } 1440 1441 /* There isn't a pool of the right size class immediately 1442 * available: use a free pool. 1443 */ 1444 if (usable_arenas == NULL) { 1445 /* No arena has a free pool: allocate a new arena. */ 1446 #ifdef WITH_MEMORY_LIMITS 1447 if (narenas_currently_allocated >= MAX_ARENAS) { 1448 goto failed; 1449 } 1450 #endif 1451 usable_arenas = new_arena(); 1452 if (usable_arenas == NULL) { 1453 goto failed; 1454 } 1455 usable_arenas->nextarena = 1456 usable_arenas->prevarena = NULL; 1457 } 1458 assert(usable_arenas->address != 0); 1459 1460 /* Try to get a cached free pool. */ 1461 pool = usable_arenas->freepools; 1462 if (pool != NULL) { 1463 /* Unlink from cached pools. */ 1464 usable_arenas->freepools = pool->nextpool; 1465 1466 /* This arena already had the smallest nfreepools 1467 * value, so decreasing nfreepools doesn't change 1468 * that, and we don't need to rearrange the 1469 * usable_arenas list. However, if the arena has 1470 * become wholly allocated, we need to remove its 1471 * arena_object from usable_arenas. 1472 */ 1473 --usable_arenas->nfreepools; 1474 if (usable_arenas->nfreepools == 0) { 1475 /* Wholly allocated: remove. */ 1476 assert(usable_arenas->freepools == NULL); 1477 assert(usable_arenas->nextarena == NULL || 1478 usable_arenas->nextarena->prevarena == 1479 usable_arenas); 1480 1481 usable_arenas = usable_arenas->nextarena; 1482 if (usable_arenas != NULL) { 1483 usable_arenas->prevarena = NULL; 1484 assert(usable_arenas->address != 0); 1485 } 1486 } 1487 else { 1488 /* nfreepools > 0: it must be that freepools 1489 * isn't NULL, or that we haven't yet carved 1490 * off all the arena's pools for the first 1491 * time. 1492 */ 1493 assert(usable_arenas->freepools != NULL || 1494 usable_arenas->pool_address <= 1495 (block*)usable_arenas->address + 1496 ARENA_SIZE - POOL_SIZE); 1497 } 1498 1499 init_pool: 1500 /* Frontlink to used pools. */ 1501 next = usedpools[size + size]; /* == prev */ 1502 pool->nextpool = next; 1503 pool->prevpool = next; 1504 next->nextpool = pool; 1505 next->prevpool = pool; 1506 pool->ref.count = 1; 1507 if (pool->szidx == size) { 1508 /* Luckily, this pool last contained blocks 1509 * of the same size class, so its header 1510 * and free list are already initialized. 1511 */ 1512 bp = pool->freeblock; 1513 assert(bp != NULL); 1514 pool->freeblock = *(block **)bp; 1515 goto success; 1516 } 1517 /* 1518 * Initialize the pool header, set up the free list to 1519 * contain just the second block, and return the first 1520 * block. 1521 */ 1522 pool->szidx = size; 1523 size = INDEX2SIZE(size); 1524 bp = (block *)pool + POOL_OVERHEAD; 1525 pool->nextoffset = POOL_OVERHEAD + (size << 1); 1526 pool->maxnextoffset = POOL_SIZE - size; 1527 pool->freeblock = bp + size; 1528 *(block **)(pool->freeblock) = NULL; 1529 goto success; 1530 } 1531 1532 /* Carve off a new pool. */ 1533 assert(usable_arenas->nfreepools > 0); 1534 assert(usable_arenas->freepools == NULL); 1535 pool = (poolp)usable_arenas->pool_address; 1536 assert((block*)pool <= (block*)usable_arenas->address + 1537 ARENA_SIZE - POOL_SIZE); 1538 pool->arenaindex = (uint)(usable_arenas - arenas); 1539 assert(&arenas[pool->arenaindex] == usable_arenas); 1540 pool->szidx = DUMMY_SIZE_IDX; 1541 usable_arenas->pool_address += POOL_SIZE; 1542 --usable_arenas->nfreepools; 1543 1544 if (usable_arenas->nfreepools == 0) { 1545 assert(usable_arenas->nextarena == NULL || 1546 usable_arenas->nextarena->prevarena == 1547 usable_arenas); 1548 /* Unlink the arena: it is completely allocated. */ 1549 usable_arenas = usable_arenas->nextarena; 1550 if (usable_arenas != NULL) { 1551 usable_arenas->prevarena = NULL; 1552 assert(usable_arenas->address != 0); 1553 } 1554 } 1555 1556 goto init_pool; 1557 1558 success: 1559 UNLOCK(); 1560 assert(bp != NULL); 1561 *ptr_p = (void *)bp; 1562 return 1; 1563 1564 failed: 1565 UNLOCK(); 1566 return 0; 1567 } 1568 1569 1570 static void * 1571 _PyObject_Malloc(void *ctx, size_t nbytes) 1572 { 1573 void* ptr; 1574 if (pymalloc_alloc(ctx, &ptr, nbytes)) { 1575 _Py_AllocatedBlocks++; 1576 return ptr; 1577 } 1578 1579 ptr = PyMem_RawMalloc(nbytes); 1580 if (ptr != NULL) { 1581 _Py_AllocatedBlocks++; 1582 } 1583 return ptr; 1584 } 1585 1586 1587 static void * 1588 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) 1589 { 1590 void* ptr; 1591 1592 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); 1593 size_t nbytes = nelem * elsize; 1594 1595 if (pymalloc_alloc(ctx, &ptr, nbytes)) { 1596 memset(ptr, 0, nbytes); 1597 _Py_AllocatedBlocks++; 1598 return ptr; 1599 } 1600 1601 ptr = PyMem_RawCalloc(nelem, elsize); 1602 if (ptr != NULL) { 1603 _Py_AllocatedBlocks++; 1604 } 1605 return ptr; 1606 } 1607 1608 1609 /* Free a memory block allocated by pymalloc_alloc(). 1610 Return 1 if it was freed. 1611 Return 0 if the block was not allocated by pymalloc_alloc(). */ 1612 static int 1613 pymalloc_free(void *ctx, void *p) 1614 { 1615 poolp pool; 1616 block *lastfree; 1617 poolp next, prev; 1618 uint size; 1619 1620 assert(p != NULL); 1621 1622 #ifdef WITH_VALGRIND 1623 if (UNLIKELY(running_on_valgrind > 0)) { 1624 return 0; 1625 } 1626 #endif 1627 1628 pool = POOL_ADDR(p); 1629 if (!address_in_range(p, pool)) { 1630 return 0; 1631 } 1632 /* We allocated this address. */ 1633 1634 LOCK(); 1635 1636 /* Link p to the start of the pool's freeblock list. Since 1637 * the pool had at least the p block outstanding, the pool 1638 * wasn't empty (so it's already in a usedpools[] list, or 1639 * was full and is in no list -- it's not in the freeblocks 1640 * list in any case). 1641 */ 1642 assert(pool->ref.count > 0); /* else it was empty */ 1643 *(block **)p = lastfree = pool->freeblock; 1644 pool->freeblock = (block *)p; 1645 if (!lastfree) { 1646 /* Pool was full, so doesn't currently live in any list: 1647 * link it to the front of the appropriate usedpools[] list. 1648 * This mimics LRU pool usage for new allocations and 1649 * targets optimal filling when several pools contain 1650 * blocks of the same size class. 1651 */ 1652 --pool->ref.count; 1653 assert(pool->ref.count > 0); /* else the pool is empty */ 1654 size = pool->szidx; 1655 next = usedpools[size + size]; 1656 prev = next->prevpool; 1657 1658 /* insert pool before next: prev <-> pool <-> next */ 1659 pool->nextpool = next; 1660 pool->prevpool = prev; 1661 next->prevpool = pool; 1662 prev->nextpool = pool; 1663 goto success; 1664 } 1665 1666 struct arena_object* ao; 1667 uint nf; /* ao->nfreepools */ 1668 1669 /* freeblock wasn't NULL, so the pool wasn't full, 1670 * and the pool is in a usedpools[] list. 1671 */ 1672 if (--pool->ref.count != 0) { 1673 /* pool isn't empty: leave it in usedpools */ 1674 goto success; 1675 } 1676 /* Pool is now empty: unlink from usedpools, and 1677 * link to the front of freepools. This ensures that 1678 * previously freed pools will be allocated later 1679 * (being not referenced, they are perhaps paged out). 1680 */ 1681 next = pool->nextpool; 1682 prev = pool->prevpool; 1683 next->prevpool = prev; 1684 prev->nextpool = next; 1685 1686 /* Link the pool to freepools. This is a singly-linked 1687 * list, and pool->prevpool isn't used there. 1688 */ 1689 ao = &arenas[pool->arenaindex]; 1690 pool->nextpool = ao->freepools; 1691 ao->freepools = pool; 1692 nf = ++ao->nfreepools; 1693 1694 /* All the rest is arena management. We just freed 1695 * a pool, and there are 4 cases for arena mgmt: 1696 * 1. If all the pools are free, return the arena to 1697 * the system free(). 1698 * 2. If this is the only free pool in the arena, 1699 * add the arena back to the `usable_arenas` list. 1700 * 3. If the "next" arena has a smaller count of free 1701 * pools, we have to "slide this arena right" to 1702 * restore that usable_arenas is sorted in order of 1703 * nfreepools. 1704 * 4. Else there's nothing more to do. 1705 */ 1706 if (nf == ao->ntotalpools) { 1707 /* Case 1. First unlink ao from usable_arenas. 1708 */ 1709 assert(ao->prevarena == NULL || 1710 ao->prevarena->address != 0); 1711 assert(ao ->nextarena == NULL || 1712 ao->nextarena->address != 0); 1713 1714 /* Fix the pointer in the prevarena, or the 1715 * usable_arenas pointer. 1716 */ 1717 if (ao->prevarena == NULL) { 1718 usable_arenas = ao->nextarena; 1719 assert(usable_arenas == NULL || 1720 usable_arenas->address != 0); 1721 } 1722 else { 1723 assert(ao->prevarena->nextarena == ao); 1724 ao->prevarena->nextarena = 1725 ao->nextarena; 1726 } 1727 /* Fix the pointer in the nextarena. */ 1728 if (ao->nextarena != NULL) { 1729 assert(ao->nextarena->prevarena == ao); 1730 ao->nextarena->prevarena = 1731 ao->prevarena; 1732 } 1733 /* Record that this arena_object slot is 1734 * available to be reused. 1735 */ 1736 ao->nextarena = unused_arena_objects; 1737 unused_arena_objects = ao; 1738 1739 /* Free the entire arena. */ 1740 _PyObject_Arena.free(_PyObject_Arena.ctx, 1741 (void *)ao->address, ARENA_SIZE); 1742 ao->address = 0; /* mark unassociated */ 1743 --narenas_currently_allocated; 1744 1745 goto success; 1746 } 1747 1748 if (nf == 1) { 1749 /* Case 2. Put ao at the head of 1750 * usable_arenas. Note that because 1751 * ao->nfreepools was 0 before, ao isn't 1752 * currently on the usable_arenas list. 1753 */ 1754 ao->nextarena = usable_arenas; 1755 ao->prevarena = NULL; 1756 if (usable_arenas) 1757 usable_arenas->prevarena = ao; 1758 usable_arenas = ao; 1759 assert(usable_arenas->address != 0); 1760 1761 goto success; 1762 } 1763 1764 /* If this arena is now out of order, we need to keep 1765 * the list sorted. The list is kept sorted so that 1766 * the "most full" arenas are used first, which allows 1767 * the nearly empty arenas to be completely freed. In 1768 * a few un-scientific tests, it seems like this 1769 * approach allowed a lot more memory to be freed. 1770 */ 1771 if (ao->nextarena == NULL || 1772 nf <= ao->nextarena->nfreepools) { 1773 /* Case 4. Nothing to do. */ 1774 goto success; 1775 } 1776 /* Case 3: We have to move the arena towards the end 1777 * of the list, because it has more free pools than 1778 * the arena to its right. 1779 * First unlink ao from usable_arenas. 1780 */ 1781 if (ao->prevarena != NULL) { 1782 /* ao isn't at the head of the list */ 1783 assert(ao->prevarena->nextarena == ao); 1784 ao->prevarena->nextarena = ao->nextarena; 1785 } 1786 else { 1787 /* ao is at the head of the list */ 1788 assert(usable_arenas == ao); 1789 usable_arenas = ao->nextarena; 1790 } 1791 ao->nextarena->prevarena = ao->prevarena; 1792 1793 /* Locate the new insertion point by iterating over 1794 * the list, using our nextarena pointer. 1795 */ 1796 while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) { 1797 ao->prevarena = ao->nextarena; 1798 ao->nextarena = ao->nextarena->nextarena; 1799 } 1800 1801 /* Insert ao at this point. */ 1802 assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena); 1803 assert(ao->prevarena->nextarena == ao->nextarena); 1804 1805 ao->prevarena->nextarena = ao; 1806 if (ao->nextarena != NULL) { 1807 ao->nextarena->prevarena = ao; 1808 } 1809 1810 /* Verify that the swaps worked. */ 1811 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools); 1812 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools); 1813 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao); 1814 assert((usable_arenas == ao && ao->prevarena == NULL) 1815 || ao->prevarena->nextarena == ao); 1816 1817 goto success; 1818 1819 success: 1820 UNLOCK(); 1821 return 1; 1822 } 1823 1824 1825 static void 1826 _PyObject_Free(void *ctx, void *p) 1827 { 1828 /* PyObject_Free(NULL) has no effect */ 1829 if (p == NULL) { 1830 return; 1831 } 1832 1833 _Py_AllocatedBlocks--; 1834 if (!pymalloc_free(ctx, p)) { 1835 /* pymalloc didn't allocate this address */ 1836 PyMem_RawFree(p); 1837 } 1838 } 1839 1840 1841 /* pymalloc realloc. 1842 1843 If nbytes==0, then as the Python docs promise, we do not treat this like 1844 free(p), and return a non-NULL result. 1845 1846 Return 1 if pymalloc reallocated memory and wrote the new pointer into 1847 newptr_p. 1848 1849 Return 0 if pymalloc didn't allocated p. */ 1850 static int 1851 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) 1852 { 1853 void *bp; 1854 poolp pool; 1855 size_t size; 1856 1857 assert(p != NULL); 1858 1859 #ifdef WITH_VALGRIND 1860 /* Treat running_on_valgrind == -1 the same as 0 */ 1861 if (UNLIKELY(running_on_valgrind > 0)) { 1862 return 0; 1863 } 1864 #endif 1865 1866 pool = POOL_ADDR(p); 1867 if (!address_in_range(p, pool)) { 1868 /* pymalloc is not managing this block. 1869 1870 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take 1871 over this block. However, if we do, we need to copy the valid data 1872 from the C-managed block to one of our blocks, and there's no 1873 portable way to know how much of the memory space starting at p is 1874 valid. 1875 1876 As bug 1185883 pointed out the hard way, it's possible that the 1877 C-managed block is "at the end" of allocated VM space, so that a 1878 memory fault can occur if we try to copy nbytes bytes starting at p. 1879 Instead we punt: let C continue to manage this block. */ 1880 return 0; 1881 } 1882 1883 /* pymalloc is in charge of this block */ 1884 size = INDEX2SIZE(pool->szidx); 1885 if (nbytes <= size) { 1886 /* The block is staying the same or shrinking. 1887 1888 If it's shrinking, there's a tradeoff: it costs cycles to copy the 1889 block to a smaller size class, but it wastes memory not to copy it. 1890 1891 The compromise here is to copy on shrink only if at least 25% of 1892 size can be shaved off. */ 1893 if (4 * nbytes > 3 * size) { 1894 /* It's the same, or shrinking and new/old > 3/4. */ 1895 *newptr_p = p; 1896 return 1; 1897 } 1898 size = nbytes; 1899 } 1900 1901 bp = _PyObject_Malloc(ctx, nbytes); 1902 if (bp != NULL) { 1903 memcpy(bp, p, size); 1904 _PyObject_Free(ctx, p); 1905 } 1906 *newptr_p = bp; 1907 return 1; 1908 } 1909 1910 1911 static void * 1912 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) 1913 { 1914 void *ptr2; 1915 1916 if (ptr == NULL) { 1917 return _PyObject_Malloc(ctx, nbytes); 1918 } 1919 1920 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) { 1921 return ptr2; 1922 } 1923 1924 return PyMem_RawRealloc(ptr, nbytes); 1925 } 1926 1927 #else /* ! WITH_PYMALLOC */ 1928 1929 /*==========================================================================*/ 1930 /* pymalloc not enabled: Redirect the entry points to malloc. These will 1931 * only be used by extensions that are compiled with pymalloc enabled. */ 1932 1933 Py_ssize_t 1934 _Py_GetAllocatedBlocks(void) 1935 { 1936 return 0; 1937 } 1938 1939 #endif /* WITH_PYMALLOC */ 1940 1941 1942 /*==========================================================================*/ 1943 /* A x-platform debugging allocator. This doesn't manage memory directly, 1944 * it wraps a real allocator, adding extra debugging info to the memory blocks. 1945 */ 1946 1947 /* Special bytes broadcast into debug memory blocks at appropriate times. 1948 * Strings of these are unlikely to be valid addresses, floats, ints or 1949 * 7-bit ASCII. 1950 */ 1951 #undef CLEANBYTE 1952 #undef DEADBYTE 1953 #undef FORBIDDENBYTE 1954 #define CLEANBYTE 0xCB /* clean (newly allocated) memory */ 1955 #define DEADBYTE 0xDB /* dead (newly freed) memory */ 1956 #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ 1957 1958 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ 1959 1960 /* serialno is always incremented via calling this routine. The point is 1961 * to supply a single place to set a breakpoint. 1962 */ 1963 static void 1964 bumpserialno(void) 1965 { 1966 ++serialno; 1967 } 1968 1969 #define SST SIZEOF_SIZE_T 1970 1971 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */ 1972 static size_t 1973 read_size_t(const void *p) 1974 { 1975 const uint8_t *q = (const uint8_t *)p; 1976 size_t result = *q++; 1977 int i; 1978 1979 for (i = SST; --i > 0; ++q) 1980 result = (result << 8) | *q; 1981 return result; 1982 } 1983 1984 /* Write n as a big-endian size_t, MSB at address p, LSB at 1985 * p + sizeof(size_t) - 1. 1986 */ 1987 static void 1988 write_size_t(void *p, size_t n) 1989 { 1990 uint8_t *q = (uint8_t *)p + SST - 1; 1991 int i; 1992 1993 for (i = SST; --i >= 0; --q) { 1994 *q = (uint8_t)(n & 0xff); 1995 n >>= 8; 1996 } 1997 } 1998 1999 /* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and 2000 fills them with useful stuff, here calling the underlying malloc's result p: 2001 2002 p[0: S] 2003 Number of bytes originally asked for. This is a size_t, big-endian (easier 2004 to read in a memory dump). 2005 p[S] 2006 API ID. See PEP 445. This is a character, but seems undocumented. 2007 p[S+1: 2*S] 2008 Copies of FORBIDDENBYTE. Used to catch under- writes and reads. 2009 p[2*S: 2*S+n] 2010 The requested memory, filled with copies of CLEANBYTE. 2011 Used to catch reference to uninitialized memory. 2012 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc 2013 handled the request itself. 2014 p[2*S+n: 2*S+n+S] 2015 Copies of FORBIDDENBYTE. Used to catch over- writes and reads. 2016 p[2*S+n+S: 2*S+n+2*S] 2017 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc 2018 and _PyMem_DebugRealloc. 2019 This is a big-endian size_t. 2020 If "bad memory" is detected later, the serial number gives an 2021 excellent way to set a breakpoint on the next run, to capture the 2022 instant at which this block was passed out. 2023 */ 2024 2025 static void * 2026 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) 2027 { 2028 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2029 uint8_t *p; /* base address of malloc'ed pad block */ 2030 uint8_t *data; /* p + 2*SST == pointer to data bytes */ 2031 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */ 2032 size_t total; /* 2 * SST + nbytes + 2 * SST */ 2033 2034 if (nbytes > (size_t)PY_SSIZE_T_MAX - 4 * SST) { 2035 /* integer overflow: can't represent total as a Py_ssize_t */ 2036 return NULL; 2037 } 2038 total = nbytes + 4 * SST; 2039 2040 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN] 2041 * ^--- p ^--- data ^--- tail 2042 S: nbytes stored as size_t 2043 I: API identifier (1 byte) 2044 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after) 2045 C: Clean bytes used later to store actual data 2046 N: Serial number stored as size_t */ 2047 2048 if (use_calloc) { 2049 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total); 2050 } 2051 else { 2052 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total); 2053 } 2054 if (p == NULL) { 2055 return NULL; 2056 } 2057 data = p + 2*SST; 2058 2059 bumpserialno(); 2060 2061 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ 2062 write_size_t(p, nbytes); 2063 p[SST] = (uint8_t)api->api_id; 2064 memset(p + SST + 1, FORBIDDENBYTE, SST-1); 2065 2066 if (nbytes > 0 && !use_calloc) { 2067 memset(data, CLEANBYTE, nbytes); 2068 } 2069 2070 /* at tail, write pad (SST bytes) and serialno (SST bytes) */ 2071 tail = data + nbytes; 2072 memset(tail, FORBIDDENBYTE, SST); 2073 write_size_t(tail + SST, serialno); 2074 2075 return data; 2076 } 2077 2078 static void * 2079 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes) 2080 { 2081 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 2082 } 2083 2084 static void * 2085 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) 2086 { 2087 size_t nbytes; 2088 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); 2089 nbytes = nelem * elsize; 2090 return _PyMem_DebugRawAlloc(1, ctx, nbytes); 2091 } 2092 2093 2094 /* Heuristic checking if the memory has been freed. Rely on the debug hooks on 2095 Python memory allocators which fills the memory with DEADBYTE (0xDB) when 2096 memory is deallocated. */ 2097 int 2098 _PyMem_IsFreed(void *ptr, size_t size) 2099 { 2100 unsigned char *bytes = ptr; 2101 for (size_t i=0; i < size; i++) { 2102 if (bytes[i] != DEADBYTE) { 2103 return 0; 2104 } 2105 } 2106 return 1; 2107 } 2108 2109 2110 /* The debug free first checks the 2*SST bytes on each end for sanity (in 2111 particular, that the FORBIDDENBYTEs with the api ID are still intact). 2112 Then fills the original bytes with DEADBYTE. 2113 Then calls the underlying free. 2114 */ 2115 static void 2116 _PyMem_DebugRawFree(void *ctx, void *p) 2117 { 2118 /* PyMem_Free(NULL) has no effect */ 2119 if (p == NULL) { 2120 return; 2121 } 2122 2123 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2124 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */ 2125 size_t nbytes; 2126 2127 _PyMem_DebugCheckAddress(api->api_id, p); 2128 nbytes = read_size_t(q); 2129 nbytes += 4 * SST; 2130 memset(q, DEADBYTE, nbytes); 2131 api->alloc.free(api->alloc.ctx, q); 2132 } 2133 2134 2135 static void * 2136 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) 2137 { 2138 if (p == NULL) { 2139 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 2140 } 2141 2142 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2143 uint8_t *head; /* base address of malloc'ed pad block */ 2144 uint8_t *data; /* pointer to data bytes */ 2145 uint8_t *r; 2146 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */ 2147 size_t total; /* 2 * SST + nbytes + 2 * SST */ 2148 size_t original_nbytes; 2149 size_t block_serialno; 2150 #define ERASED_SIZE 64 2151 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */ 2152 2153 _PyMem_DebugCheckAddress(api->api_id, p); 2154 2155 data = (uint8_t *)p; 2156 head = data - 2*SST; 2157 original_nbytes = read_size_t(head); 2158 if (nbytes > (size_t)PY_SSIZE_T_MAX - 4*SST) { 2159 /* integer overflow: can't represent total as a Py_ssize_t */ 2160 return NULL; 2161 } 2162 total = nbytes + 4*SST; 2163 2164 tail = data + original_nbytes; 2165 block_serialno = read_size_t(tail + SST); 2166 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and 2167 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes. 2168 */ 2169 if (original_nbytes <= sizeof(save)) { 2170 memcpy(save, data, original_nbytes); 2171 memset(data - 2*SST, DEADBYTE, original_nbytes + 4*SST); 2172 } 2173 else { 2174 memcpy(save, data, ERASED_SIZE); 2175 memset(head, DEADBYTE, ERASED_SIZE + 2*SST); 2176 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE); 2177 memset(tail - ERASED_SIZE, DEADBYTE, ERASED_SIZE + 2*SST); 2178 } 2179 2180 /* Resize and add decorations. */ 2181 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total); 2182 if (r == NULL) { 2183 nbytes = original_nbytes; 2184 } 2185 else { 2186 head = r; 2187 bumpserialno(); 2188 block_serialno = serialno; 2189 } 2190 2191 write_size_t(head, nbytes); 2192 head[SST] = (uint8_t)api->api_id; 2193 memset(head + SST + 1, FORBIDDENBYTE, SST-1); 2194 data = head + 2*SST; 2195 2196 tail = data + nbytes; 2197 memset(tail, FORBIDDENBYTE, SST); 2198 write_size_t(tail + SST, block_serialno); 2199 2200 /* Restore saved bytes. */ 2201 if (original_nbytes <= sizeof(save)) { 2202 memcpy(data, save, Py_MIN(nbytes, original_nbytes)); 2203 } 2204 else { 2205 size_t i = original_nbytes - ERASED_SIZE; 2206 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE)); 2207 if (nbytes > i) { 2208 memcpy(data + i, &save[ERASED_SIZE], 2209 Py_MIN(nbytes - i, ERASED_SIZE)); 2210 } 2211 } 2212 2213 if (r == NULL) { 2214 return NULL; 2215 } 2216 2217 if (nbytes > original_nbytes) { 2218 /* growing: mark new extra memory clean */ 2219 memset(data + original_nbytes, CLEANBYTE, nbytes - original_nbytes); 2220 } 2221 2222 return data; 2223 } 2224 2225 static void 2226 _PyMem_DebugCheckGIL(void) 2227 { 2228 if (!PyGILState_Check()) 2229 Py_FatalError("Python memory allocator called " 2230 "without holding the GIL"); 2231 } 2232 2233 static void * 2234 _PyMem_DebugMalloc(void *ctx, size_t nbytes) 2235 { 2236 _PyMem_DebugCheckGIL(); 2237 return _PyMem_DebugRawMalloc(ctx, nbytes); 2238 } 2239 2240 static void * 2241 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize) 2242 { 2243 _PyMem_DebugCheckGIL(); 2244 return _PyMem_DebugRawCalloc(ctx, nelem, elsize); 2245 } 2246 2247 2248 static void 2249 _PyMem_DebugFree(void *ctx, void *ptr) 2250 { 2251 _PyMem_DebugCheckGIL(); 2252 _PyMem_DebugRawFree(ctx, ptr); 2253 } 2254 2255 2256 static void * 2257 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes) 2258 { 2259 _PyMem_DebugCheckGIL(); 2260 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes); 2261 } 2262 2263 /* Check the forbidden bytes on both ends of the memory allocated for p. 2264 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress, 2265 * and call Py_FatalError to kill the program. 2266 * The API id, is also checked. 2267 */ 2268 static void 2269 _PyMem_DebugCheckAddress(char api, const void *p) 2270 { 2271 const uint8_t *q = (const uint8_t *)p; 2272 char msgbuf[64]; 2273 const char *msg; 2274 size_t nbytes; 2275 const uint8_t *tail; 2276 int i; 2277 char id; 2278 2279 if (p == NULL) { 2280 msg = "didn't expect a NULL pointer"; 2281 goto error; 2282 } 2283 2284 /* Check the API id */ 2285 id = (char)q[-SST]; 2286 if (id != api) { 2287 msg = msgbuf; 2288 snprintf(msgbuf, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api); 2289 msgbuf[sizeof(msgbuf)-1] = 0; 2290 goto error; 2291 } 2292 2293 /* Check the stuff at the start of p first: if there's underwrite 2294 * corruption, the number-of-bytes field may be nuts, and checking 2295 * the tail could lead to a segfault then. 2296 */ 2297 for (i = SST-1; i >= 1; --i) { 2298 if (*(q-i) != FORBIDDENBYTE) { 2299 msg = "bad leading pad byte"; 2300 goto error; 2301 } 2302 } 2303 2304 nbytes = read_size_t(q - 2*SST); 2305 tail = q + nbytes; 2306 for (i = 0; i < SST; ++i) { 2307 if (tail[i] != FORBIDDENBYTE) { 2308 msg = "bad trailing pad byte"; 2309 goto error; 2310 } 2311 } 2312 2313 return; 2314 2315 error: 2316 _PyObject_DebugDumpAddress(p); 2317 Py_FatalError(msg); 2318 } 2319 2320 /* Display info to stderr about the memory block at p. */ 2321 static void 2322 _PyObject_DebugDumpAddress(const void *p) 2323 { 2324 const uint8_t *q = (const uint8_t *)p; 2325 const uint8_t *tail; 2326 size_t nbytes, serial; 2327 int i; 2328 int ok; 2329 char id; 2330 2331 fprintf(stderr, "Debug memory block at address p=%p:", p); 2332 if (p == NULL) { 2333 fprintf(stderr, "\n"); 2334 return; 2335 } 2336 id = (char)q[-SST]; 2337 fprintf(stderr, " API '%c'\n", id); 2338 2339 nbytes = read_size_t(q - 2*SST); 2340 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally " 2341 "requested\n", nbytes); 2342 2343 /* In case this is nuts, check the leading pad bytes first. */ 2344 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1); 2345 ok = 1; 2346 for (i = 1; i <= SST-1; ++i) { 2347 if (*(q-i) != FORBIDDENBYTE) { 2348 ok = 0; 2349 break; 2350 } 2351 } 2352 if (ok) 2353 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2354 else { 2355 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2356 FORBIDDENBYTE); 2357 for (i = SST-1; i >= 1; --i) { 2358 const uint8_t byte = *(q-i); 2359 fprintf(stderr, " at p-%d: 0x%02x", i, byte); 2360 if (byte != FORBIDDENBYTE) 2361 fputs(" *** OUCH", stderr); 2362 fputc('\n', stderr); 2363 } 2364 2365 fputs(" Because memory is corrupted at the start, the " 2366 "count of bytes requested\n" 2367 " may be bogus, and checking the trailing pad " 2368 "bytes may segfault.\n", stderr); 2369 } 2370 2371 tail = q + nbytes; 2372 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail); 2373 ok = 1; 2374 for (i = 0; i < SST; ++i) { 2375 if (tail[i] != FORBIDDENBYTE) { 2376 ok = 0; 2377 break; 2378 } 2379 } 2380 if (ok) 2381 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2382 else { 2383 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2384 FORBIDDENBYTE); 2385 for (i = 0; i < SST; ++i) { 2386 const uint8_t byte = tail[i]; 2387 fprintf(stderr, " at tail+%d: 0x%02x", 2388 i, byte); 2389 if (byte != FORBIDDENBYTE) 2390 fputs(" *** OUCH", stderr); 2391 fputc('\n', stderr); 2392 } 2393 } 2394 2395 serial = read_size_t(tail + SST); 2396 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T 2397 "u to debug malloc/realloc.\n", serial); 2398 2399 if (nbytes > 0) { 2400 i = 0; 2401 fputs(" Data at p:", stderr); 2402 /* print up to 8 bytes at the start */ 2403 while (q < tail && i < 8) { 2404 fprintf(stderr, " %02x", *q); 2405 ++i; 2406 ++q; 2407 } 2408 /* and up to 8 at the end */ 2409 if (q < tail) { 2410 if (tail - q > 8) { 2411 fputs(" ...", stderr); 2412 q = tail - 8; 2413 } 2414 while (q < tail) { 2415 fprintf(stderr, " %02x", *q); 2416 ++q; 2417 } 2418 } 2419 fputc('\n', stderr); 2420 } 2421 fputc('\n', stderr); 2422 2423 fflush(stderr); 2424 _PyMem_DumpTraceback(fileno(stderr), p); 2425 } 2426 2427 2428 static size_t 2429 printone(FILE *out, const char* msg, size_t value) 2430 { 2431 int i, k; 2432 char buf[100]; 2433 size_t origvalue = value; 2434 2435 fputs(msg, out); 2436 for (i = (int)strlen(msg); i < 35; ++i) 2437 fputc(' ', out); 2438 fputc('=', out); 2439 2440 /* Write the value with commas. */ 2441 i = 22; 2442 buf[i--] = '\0'; 2443 buf[i--] = '\n'; 2444 k = 3; 2445 do { 2446 size_t nextvalue = value / 10; 2447 unsigned int digit = (unsigned int)(value - nextvalue * 10); 2448 value = nextvalue; 2449 buf[i--] = (char)(digit + '0'); 2450 --k; 2451 if (k == 0 && value && i >= 0) { 2452 k = 3; 2453 buf[i--] = ','; 2454 } 2455 } while (value && i >= 0); 2456 2457 while (i >= 0) 2458 buf[i--] = ' '; 2459 fputs(buf, out); 2460 2461 return origvalue; 2462 } 2463 2464 void 2465 _PyDebugAllocatorStats(FILE *out, 2466 const char *block_name, int num_blocks, size_t sizeof_block) 2467 { 2468 char buf1[128]; 2469 char buf2[128]; 2470 PyOS_snprintf(buf1, sizeof(buf1), 2471 "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each", 2472 num_blocks, block_name, sizeof_block); 2473 PyOS_snprintf(buf2, sizeof(buf2), 2474 "%48s ", buf1); 2475 (void)printone(out, buf2, num_blocks * sizeof_block); 2476 } 2477 2478 2479 #ifdef WITH_PYMALLOC 2480 2481 #ifdef Py_DEBUG 2482 /* Is target in the list? The list is traversed via the nextpool pointers. 2483 * The list may be NULL-terminated, or circular. Return 1 if target is in 2484 * list, else 0. 2485 */ 2486 static int 2487 pool_is_in_list(const poolp target, poolp list) 2488 { 2489 poolp origlist = list; 2490 assert(target != NULL); 2491 if (list == NULL) 2492 return 0; 2493 do { 2494 if (target == list) 2495 return 1; 2496 list = list->nextpool; 2497 } while (list != NULL && list != origlist); 2498 return 0; 2499 } 2500 #endif 2501 2502 /* Print summary info to "out" about the state of pymalloc's structures. 2503 * In Py_DEBUG mode, also perform some expensive internal consistency 2504 * checks. 2505 * 2506 * Return 0 if the memory debug hooks are not installed or no statistics was 2507 * written into out, return 1 otherwise. 2508 */ 2509 int 2510 _PyObject_DebugMallocStats(FILE *out) 2511 { 2512 if (!_PyMem_PymallocEnabled()) { 2513 return 0; 2514 } 2515 2516 uint i; 2517 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; 2518 /* # of pools, allocated blocks, and free blocks per class index */ 2519 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2520 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2521 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2522 /* total # of allocated bytes in used and full pools */ 2523 size_t allocated_bytes = 0; 2524 /* total # of available bytes in used pools */ 2525 size_t available_bytes = 0; 2526 /* # of free pools + pools not yet carved out of current arena */ 2527 uint numfreepools = 0; 2528 /* # of bytes for arena alignment padding */ 2529 size_t arena_alignment = 0; 2530 /* # of bytes in used and full pools used for pool_headers */ 2531 size_t pool_header_bytes = 0; 2532 /* # of bytes in used and full pools wasted due to quantization, 2533 * i.e. the necessarily leftover space at the ends of used and 2534 * full pools. 2535 */ 2536 size_t quantization = 0; 2537 /* # of arenas actually allocated. */ 2538 size_t narenas = 0; 2539 /* running total -- should equal narenas * ARENA_SIZE */ 2540 size_t total; 2541 char buf[128]; 2542 2543 fprintf(out, "Small block threshold = %d, in %u size classes.\n", 2544 SMALL_REQUEST_THRESHOLD, numclasses); 2545 2546 for (i = 0; i < numclasses; ++i) 2547 numpools[i] = numblocks[i] = numfreeblocks[i] = 0; 2548 2549 /* Because full pools aren't linked to from anything, it's easiest 2550 * to march over all the arenas. If we're lucky, most of the memory 2551 * will be living in full pools -- would be a shame to miss them. 2552 */ 2553 for (i = 0; i < maxarenas; ++i) { 2554 uint j; 2555 uintptr_t base = arenas[i].address; 2556 2557 /* Skip arenas which are not allocated. */ 2558 if (arenas[i].address == (uintptr_t)NULL) 2559 continue; 2560 narenas += 1; 2561 2562 numfreepools += arenas[i].nfreepools; 2563 2564 /* round up to pool alignment */ 2565 if (base & (uintptr_t)POOL_SIZE_MASK) { 2566 arena_alignment += POOL_SIZE; 2567 base &= ~(uintptr_t)POOL_SIZE_MASK; 2568 base += POOL_SIZE; 2569 } 2570 2571 /* visit every pool in the arena */ 2572 assert(base <= (uintptr_t) arenas[i].pool_address); 2573 for (j = 0; base < (uintptr_t) arenas[i].pool_address; 2574 ++j, base += POOL_SIZE) { 2575 poolp p = (poolp)base; 2576 const uint sz = p->szidx; 2577 uint freeblocks; 2578 2579 if (p->ref.count == 0) { 2580 /* currently unused */ 2581 #ifdef Py_DEBUG 2582 assert(pool_is_in_list(p, arenas[i].freepools)); 2583 #endif 2584 continue; 2585 } 2586 ++numpools[sz]; 2587 numblocks[sz] += p->ref.count; 2588 freeblocks = NUMBLOCKS(sz) - p->ref.count; 2589 numfreeblocks[sz] += freeblocks; 2590 #ifdef Py_DEBUG 2591 if (freeblocks > 0) 2592 assert(pool_is_in_list(p, usedpools[sz + sz])); 2593 #endif 2594 } 2595 } 2596 assert(narenas == narenas_currently_allocated); 2597 2598 fputc('\n', out); 2599 fputs("class size num pools blocks in use avail blocks\n" 2600 "----- ---- --------- ------------- ------------\n", 2601 out); 2602 2603 for (i = 0; i < numclasses; ++i) { 2604 size_t p = numpools[i]; 2605 size_t b = numblocks[i]; 2606 size_t f = numfreeblocks[i]; 2607 uint size = INDEX2SIZE(i); 2608 if (p == 0) { 2609 assert(b == 0 && f == 0); 2610 continue; 2611 } 2612 fprintf(out, "%5u %6u " 2613 "%11" PY_FORMAT_SIZE_T "u " 2614 "%15" PY_FORMAT_SIZE_T "u " 2615 "%13" PY_FORMAT_SIZE_T "u\n", 2616 i, size, p, b, f); 2617 allocated_bytes += b * size; 2618 available_bytes += f * size; 2619 pool_header_bytes += p * POOL_OVERHEAD; 2620 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size); 2621 } 2622 fputc('\n', out); 2623 if (_PyMem_DebugEnabled()) 2624 (void)printone(out, "# times object malloc called", serialno); 2625 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); 2626 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); 2627 (void)printone(out, "# arenas highwater mark", narenas_highwater); 2628 (void)printone(out, "# arenas allocated current", narenas); 2629 2630 PyOS_snprintf(buf, sizeof(buf), 2631 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena", 2632 narenas, ARENA_SIZE); 2633 (void)printone(out, buf, narenas * ARENA_SIZE); 2634 2635 fputc('\n', out); 2636 2637 total = printone(out, "# bytes in allocated blocks", allocated_bytes); 2638 total += printone(out, "# bytes in available blocks", available_bytes); 2639 2640 PyOS_snprintf(buf, sizeof(buf), 2641 "%u unused pools * %d bytes", numfreepools, POOL_SIZE); 2642 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE); 2643 2644 total += printone(out, "# bytes lost to pool headers", pool_header_bytes); 2645 total += printone(out, "# bytes lost to quantization", quantization); 2646 total += printone(out, "# bytes lost to arena alignment", arena_alignment); 2647 (void)printone(out, "Total", total); 2648 return 1; 2649 } 2650 2651 #endif /* #ifdef WITH_PYMALLOC */ 2652