1 /******************************************************************************/ 2 #ifdef JEMALLOC_H_TYPES 3 4 typedef struct prof_bt_s prof_bt_t; 5 typedef struct prof_cnt_s prof_cnt_t; 6 typedef struct prof_tctx_s prof_tctx_t; 7 typedef struct prof_gctx_s prof_gctx_t; 8 typedef struct prof_tdata_s prof_tdata_t; 9 10 /* Option defaults. */ 11 #ifdef JEMALLOC_PROF 12 # define PROF_PREFIX_DEFAULT "jeprof" 13 #else 14 # define PROF_PREFIX_DEFAULT "" 15 #endif 16 #define LG_PROF_SAMPLE_DEFAULT 19 17 #define LG_PROF_INTERVAL_DEFAULT -1 18 19 /* 20 * Hard limit on stack backtrace depth. The version of prof_backtrace() that 21 * is based on __builtin_return_address() necessarily has a hard-coded number 22 * of backtrace frame handlers, and should be kept in sync with this setting. 23 */ 24 #define PROF_BT_MAX 128 25 26 /* Initial hash table size. */ 27 #define PROF_CKH_MINITEMS 64 28 29 /* Size of memory buffer to use when writing dump files. */ 30 #define PROF_DUMP_BUFSIZE 65536 31 32 /* Size of stack-allocated buffer used by prof_printf(). */ 33 #define PROF_PRINTF_BUFSIZE 128 34 35 /* 36 * Number of mutexes shared among all gctx's. No space is allocated for these 37 * unless profiling is enabled, so it's okay to over-provision. 38 */ 39 #define PROF_NCTX_LOCKS 1024 40 41 /* 42 * Number of mutexes shared among all tdata's. No space is allocated for these 43 * unless profiling is enabled, so it's okay to over-provision. 44 */ 45 #define PROF_NTDATA_LOCKS 256 46 47 /* 48 * prof_tdata pointers close to NULL are used to encode state information that 49 * is used for cleaning up during thread shutdown. 50 */ 51 #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) 52 #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) 53 #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY 54 55 #endif /* JEMALLOC_H_TYPES */ 56 /******************************************************************************/ 57 #ifdef JEMALLOC_H_STRUCTS 58 59 struct prof_bt_s { 60 /* Backtrace, stored as len program counters. */ 61 void **vec; 62 unsigned len; 63 }; 64 65 #ifdef JEMALLOC_PROF_LIBGCC 66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ 67 typedef struct { 68 prof_bt_t *bt; 69 unsigned max; 70 } prof_unwind_data_t; 71 #endif 72 73 struct prof_cnt_s { 74 /* Profiling counters. */ 75 uint64_t curobjs; 76 uint64_t curbytes; 77 uint64_t accumobjs; 78 uint64_t accumbytes; 79 }; 80 81 typedef enum { 82 prof_tctx_state_initializing, 83 prof_tctx_state_nominal, 84 prof_tctx_state_dumping, 85 prof_tctx_state_purgatory /* Dumper must finish destroying. */ 86 } prof_tctx_state_t; 87 88 struct prof_tctx_s { 89 /* Thread data for thread that performed the allocation. */ 90 prof_tdata_t *tdata; 91 92 /* 93 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be 94 * defunct during teardown. 95 */ 96 uint64_t thr_uid; 97 uint64_t thr_discrim; 98 99 /* Profiling counters, protected by tdata->lock. */ 100 prof_cnt_t cnts; 101 102 /* Associated global context. */ 103 prof_gctx_t *gctx; 104 105 /* 106 * UID that distinguishes multiple tctx's created by the same thread, 107 * but coexisting in gctx->tctxs. There are two ways that such 108 * coexistence can occur: 109 * - A dumper thread can cause a tctx to be retained in the purgatory 110 * state. 111 * - Although a single "producer" thread must create all tctx's which 112 * share the same thr_uid, multiple "consumers" can each concurrently 113 * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only 114 * gets called once each time cnts.cur{objs,bytes} drop to 0, but this 115 * threshold can be hit again before the first consumer finishes 116 * executing prof_tctx_destroy(). 117 */ 118 uint64_t tctx_uid; 119 120 /* Linkage into gctx's tctxs. */ 121 rb_node(prof_tctx_t) tctx_link; 122 123 /* 124 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents 125 * sample vs destroy race. 126 */ 127 bool prepared; 128 129 /* Current dump-related state, protected by gctx->lock. */ 130 prof_tctx_state_t state; 131 132 /* 133 * Copy of cnts snapshotted during early dump phase, protected by 134 * dump_mtx. 135 */ 136 prof_cnt_t dump_cnts; 137 }; 138 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; 139 140 struct prof_gctx_s { 141 /* Protects nlimbo, cnt_summed, and tctxs. */ 142 malloc_mutex_t *lock; 143 144 /* 145 * Number of threads that currently cause this gctx to be in a state of 146 * limbo due to one of: 147 * - Initializing this gctx. 148 * - Initializing per thread counters associated with this gctx. 149 * - Preparing to destroy this gctx. 150 * - Dumping a heap profile that includes this gctx. 151 * nlimbo must be 1 (single destroyer) in order to safely destroy the 152 * gctx. 153 */ 154 unsigned nlimbo; 155 156 /* 157 * Tree of profile counters, one for each thread that has allocated in 158 * this context. 159 */ 160 prof_tctx_tree_t tctxs; 161 162 /* Linkage for tree of contexts to be dumped. */ 163 rb_node(prof_gctx_t) dump_link; 164 165 /* Temporary storage for summation during dump. */ 166 prof_cnt_t cnt_summed; 167 168 /* Associated backtrace. */ 169 prof_bt_t bt; 170 171 /* Backtrace vector, variable size, referred to by bt. */ 172 void *vec[1]; 173 }; 174 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; 175 176 struct prof_tdata_s { 177 malloc_mutex_t *lock; 178 179 /* Monotonically increasing unique thread identifier. */ 180 uint64_t thr_uid; 181 182 /* 183 * Monotonically increasing discriminator among tdata structures 184 * associated with the same thr_uid. 185 */ 186 uint64_t thr_discrim; 187 188 /* Included in heap profile dumps if non-NULL. */ 189 char *thread_name; 190 191 bool attached; 192 bool expired; 193 194 rb_node(prof_tdata_t) tdata_link; 195 196 /* 197 * Counter used to initialize prof_tctx_t's tctx_uid. No locking is 198 * necessary when incrementing this field, because only one thread ever 199 * does so. 200 */ 201 uint64_t tctx_uid_next; 202 203 /* 204 * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks 205 * backtraces for which it has non-zero allocation/deallocation counters 206 * associated with thread-specific prof_tctx_t objects. Other threads 207 * may write to prof_tctx_t contents when freeing associated objects. 208 */ 209 ckh_t bt2tctx; 210 211 /* Sampling state. */ 212 uint64_t prng_state; 213 uint64_t bytes_until_sample; 214 215 /* State used to avoid dumping while operating on prof internals. */ 216 bool enq; 217 bool enq_idump; 218 bool enq_gdump; 219 220 /* 221 * Set to true during an early dump phase for tdata's which are 222 * currently being dumped. New threads' tdata's have this initialized 223 * to false so that they aren't accidentally included in later dump 224 * phases. 225 */ 226 bool dumping; 227 228 /* 229 * True if profiling is active for this tdata's thread 230 * (thread.prof.active mallctl). 231 */ 232 bool active; 233 234 /* Temporary storage for summation during dump. */ 235 prof_cnt_t cnt_summed; 236 237 /* Backtrace vector, used for calls to prof_backtrace(). */ 238 void *vec[PROF_BT_MAX]; 239 }; 240 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; 241 242 #endif /* JEMALLOC_H_STRUCTS */ 243 /******************************************************************************/ 244 #ifdef JEMALLOC_H_EXTERNS 245 246 extern bool opt_prof; 247 extern bool opt_prof_active; 248 extern bool opt_prof_thread_active_init; 249 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ 250 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ 251 extern bool opt_prof_gdump; /* High-water memory dumping. */ 252 extern bool opt_prof_final; /* Final profile dumping. */ 253 extern bool opt_prof_leak; /* Dump leak summary at exit. */ 254 extern bool opt_prof_accum; /* Report cumulative bytes. */ 255 extern char opt_prof_prefix[ 256 /* Minimize memory bloat for non-prof builds. */ 257 #ifdef JEMALLOC_PROF 258 PATH_MAX + 259 #endif 260 1]; 261 262 /* Accessed via prof_active_[gs]et{_unlocked,}(). */ 263 extern bool prof_active; 264 265 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ 266 extern bool prof_gdump_val; 267 268 /* 269 * Profile dump interval, measured in bytes allocated. Each arena triggers a 270 * profile dump when it reaches this threshold. The effect is that the 271 * interval between profile dumps averages prof_interval, though the actual 272 * interval between dumps will tend to be sporadic, and the interval will be a 273 * maximum of approximately (prof_interval * narenas). 274 */ 275 extern uint64_t prof_interval; 276 277 /* 278 * Initialized as opt_lg_prof_sample, and potentially modified during profiling 279 * resets. 280 */ 281 extern size_t lg_prof_sample; 282 283 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); 284 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, 285 prof_tctx_t *tctx); 286 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); 287 void bt_init(prof_bt_t *bt, void **vec); 288 void prof_backtrace(prof_bt_t *bt); 289 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); 290 #ifdef JEMALLOC_JET 291 size_t prof_tdata_count(void); 292 size_t prof_bt_count(void); 293 const prof_cnt_t *prof_cnt_all(void); 294 typedef int (prof_dump_open_t)(bool, const char *); 295 extern prof_dump_open_t *prof_dump_open; 296 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); 297 extern prof_dump_header_t *prof_dump_header; 298 #endif 299 void prof_idump(tsdn_t *tsdn); 300 bool prof_mdump(tsd_t *tsd, const char *filename); 301 void prof_gdump(tsdn_t *tsdn); 302 prof_tdata_t *prof_tdata_init(tsd_t *tsd); 303 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); 304 void prof_reset(tsd_t *tsd, size_t lg_sample); 305 void prof_tdata_cleanup(tsd_t *tsd); 306 bool prof_active_get(tsdn_t *tsdn); 307 bool prof_active_set(tsdn_t *tsdn, bool active); 308 const char *prof_thread_name_get(tsd_t *tsd); 309 int prof_thread_name_set(tsd_t *tsd, const char *thread_name); 310 bool prof_thread_active_get(tsd_t *tsd); 311 bool prof_thread_active_set(tsd_t *tsd, bool active); 312 bool prof_thread_active_init_get(tsdn_t *tsdn); 313 bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); 314 bool prof_gdump_get(tsdn_t *tsdn); 315 bool prof_gdump_set(tsdn_t *tsdn, bool active); 316 void prof_boot0(void); 317 void prof_boot1(void); 318 bool prof_boot2(tsd_t *tsd); 319 void prof_prefork0(tsdn_t *tsdn); 320 void prof_prefork1(tsdn_t *tsdn); 321 void prof_postfork_parent(tsdn_t *tsdn); 322 void prof_postfork_child(tsdn_t *tsdn); 323 void prof_sample_threshold_update(prof_tdata_t *tdata); 324 325 #endif /* JEMALLOC_H_EXTERNS */ 326 /******************************************************************************/ 327 #ifdef JEMALLOC_H_INLINES 328 329 #ifndef JEMALLOC_ENABLE_INLINE 330 bool prof_active_get_unlocked(void); 331 bool prof_gdump_get_unlocked(void); 332 prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); 333 prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const void *ptr); 334 void prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, 335 prof_tctx_t *tctx); 336 void prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, 337 const void *old_ptr, prof_tctx_t *tctx); 338 bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, 339 prof_tdata_t **tdata_out); 340 prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, 341 bool update); 342 void prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, 343 prof_tctx_t *tctx); 344 void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, 345 prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, 346 size_t old_usize, prof_tctx_t *old_tctx); 347 void prof_free(tsd_t *tsd, const void *ptr, size_t usize); 348 #endif 349 350 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) 351 JEMALLOC_ALWAYS_INLINE bool 352 prof_active_get_unlocked(void) 353 { 354 355 /* 356 * Even if opt_prof is true, sampling can be temporarily disabled by 357 * setting prof_active to false. No locking is used when reading 358 * prof_active in the fast path, so there are no guarantees regarding 359 * how long it will take for all threads to notice state changes. 360 */ 361 return (prof_active); 362 } 363 364 JEMALLOC_ALWAYS_INLINE bool 365 prof_gdump_get_unlocked(void) 366 { 367 368 /* 369 * No locking is used when reading prof_gdump_val in the fast path, so 370 * there are no guarantees regarding how long it will take for all 371 * threads to notice state changes. 372 */ 373 return (prof_gdump_val); 374 } 375 376 JEMALLOC_ALWAYS_INLINE prof_tdata_t * 377 prof_tdata_get(tsd_t *tsd, bool create) 378 { 379 prof_tdata_t *tdata; 380 381 cassert(config_prof); 382 383 tdata = tsd_prof_tdata_get(tsd); 384 if (create) { 385 if (unlikely(tdata == NULL)) { 386 if (tsd_nominal(tsd)) { 387 tdata = prof_tdata_init(tsd); 388 tsd_prof_tdata_set(tsd, tdata); 389 } 390 } else if (unlikely(tdata->expired)) { 391 tdata = prof_tdata_reinit(tsd, tdata); 392 tsd_prof_tdata_set(tsd, tdata); 393 } 394 assert(tdata == NULL || tdata->attached); 395 } 396 397 return (tdata); 398 } 399 400 JEMALLOC_ALWAYS_INLINE prof_tctx_t * 401 prof_tctx_get(tsdn_t *tsdn, const void *ptr) 402 { 403 404 cassert(config_prof); 405 assert(ptr != NULL); 406 407 return (arena_prof_tctx_get(tsdn, ptr)); 408 } 409 410 JEMALLOC_ALWAYS_INLINE void 411 prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) 412 { 413 414 cassert(config_prof); 415 assert(ptr != NULL); 416 417 arena_prof_tctx_set(tsdn, ptr, usize, tctx); 418 } 419 420 JEMALLOC_ALWAYS_INLINE void 421 prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr, 422 prof_tctx_t *old_tctx) 423 { 424 425 cassert(config_prof); 426 assert(ptr != NULL); 427 428 arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx); 429 } 430 431 JEMALLOC_ALWAYS_INLINE bool 432 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, 433 prof_tdata_t **tdata_out) 434 { 435 prof_tdata_t *tdata; 436 437 cassert(config_prof); 438 439 tdata = prof_tdata_get(tsd, true); 440 if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) 441 tdata = NULL; 442 443 if (tdata_out != NULL) 444 *tdata_out = tdata; 445 446 if (unlikely(tdata == NULL)) 447 return (true); 448 449 if (likely(tdata->bytes_until_sample >= usize)) { 450 if (update) 451 tdata->bytes_until_sample -= usize; 452 return (true); 453 } else { 454 /* Compute new sample threshold. */ 455 if (update) 456 prof_sample_threshold_update(tdata); 457 return (!tdata->active); 458 } 459 } 460 461 JEMALLOC_ALWAYS_INLINE prof_tctx_t * 462 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) 463 { 464 prof_tctx_t *ret; 465 prof_tdata_t *tdata; 466 prof_bt_t bt; 467 468 assert(usize == s2u(usize)); 469 470 if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, 471 &tdata))) 472 ret = (prof_tctx_t *)(uintptr_t)1U; 473 else { 474 bt_init(&bt, tdata->vec); 475 prof_backtrace(&bt); 476 ret = prof_lookup(tsd, &bt); 477 } 478 479 return (ret); 480 } 481 482 JEMALLOC_ALWAYS_INLINE void 483 prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) 484 { 485 486 cassert(config_prof); 487 assert(ptr != NULL); 488 assert(usize == isalloc(tsdn, ptr, true)); 489 490 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 491 prof_malloc_sample_object(tsdn, ptr, usize, tctx); 492 else 493 prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U); 494 } 495 496 JEMALLOC_ALWAYS_INLINE void 497 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, 498 bool prof_active, bool updated, const void *old_ptr, size_t old_usize, 499 prof_tctx_t *old_tctx) 500 { 501 bool sampled, old_sampled; 502 503 cassert(config_prof); 504 assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); 505 506 if (prof_active && !updated && ptr != NULL) { 507 assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); 508 if (prof_sample_accum_update(tsd, usize, true, NULL)) { 509 /* 510 * Don't sample. The usize passed to prof_alloc_prep() 511 * was larger than what actually got allocated, so a 512 * backtrace was captured for this allocation, even 513 * though its actual usize was insufficient to cross the 514 * sample threshold. 515 */ 516 prof_alloc_rollback(tsd, tctx, true); 517 tctx = (prof_tctx_t *)(uintptr_t)1U; 518 } 519 } 520 521 sampled = ((uintptr_t)tctx > (uintptr_t)1U); 522 old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); 523 524 if (unlikely(sampled)) 525 prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); 526 else 527 prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx); 528 529 if (unlikely(old_sampled)) 530 prof_free_sampled_object(tsd, old_usize, old_tctx); 531 } 532 533 JEMALLOC_ALWAYS_INLINE void 534 prof_free(tsd_t *tsd, const void *ptr, size_t usize) 535 { 536 prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); 537 538 cassert(config_prof); 539 assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); 540 541 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 542 prof_free_sampled_object(tsd, usize, tctx); 543 } 544 #endif 545 546 #endif /* JEMALLOC_H_INLINES */ 547 /******************************************************************************/ 548