1 /******************************************************************************/ 2 #ifdef JEMALLOC_H_TYPES 3 4 typedef struct prof_bt_s prof_bt_t; 5 typedef struct prof_cnt_s prof_cnt_t; 6 typedef struct prof_tctx_s prof_tctx_t; 7 typedef struct prof_gctx_s prof_gctx_t; 8 typedef struct prof_tdata_s prof_tdata_t; 9 10 /* Option defaults. */ 11 #ifdef JEMALLOC_PROF 12 # define PROF_PREFIX_DEFAULT "jeprof" 13 #else 14 # define PROF_PREFIX_DEFAULT "" 15 #endif 16 #define LG_PROF_SAMPLE_DEFAULT 19 17 #define LG_PROF_INTERVAL_DEFAULT -1 18 19 /* 20 * Hard limit on stack backtrace depth. The version of prof_backtrace() that 21 * is based on __builtin_return_address() necessarily has a hard-coded number 22 * of backtrace frame handlers, and should be kept in sync with this setting. 23 */ 24 #define PROF_BT_MAX 128 25 26 /* Initial hash table size. */ 27 #define PROF_CKH_MINITEMS 64 28 29 /* Size of memory buffer to use when writing dump files. */ 30 #define PROF_DUMP_BUFSIZE 65536 31 32 /* Size of stack-allocated buffer used by prof_printf(). */ 33 #define PROF_PRINTF_BUFSIZE 128 34 35 /* 36 * Number of mutexes shared among all gctx's. No space is allocated for these 37 * unless profiling is enabled, so it's okay to over-provision. 38 */ 39 #define PROF_NCTX_LOCKS 1024 40 41 /* 42 * Number of mutexes shared among all tdata's. No space is allocated for these 43 * unless profiling is enabled, so it's okay to over-provision. 44 */ 45 #define PROF_NTDATA_LOCKS 256 46 47 /* 48 * prof_tdata pointers close to NULL are used to encode state information that 49 * is used for cleaning up during thread shutdown. 50 */ 51 #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) 52 #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) 53 #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY 54 55 #endif /* JEMALLOC_H_TYPES */ 56 /******************************************************************************/ 57 #ifdef JEMALLOC_H_STRUCTS 58 59 struct prof_bt_s { 60 /* Backtrace, stored as len program counters. */ 61 void **vec; 62 unsigned len; 63 }; 64 65 #ifdef JEMALLOC_PROF_LIBGCC 66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ 67 typedef struct { 68 prof_bt_t *bt; 69 unsigned max; 70 } prof_unwind_data_t; 71 #endif 72 73 struct prof_cnt_s { 74 /* Profiling counters. */ 75 uint64_t curobjs; 76 uint64_t curbytes; 77 uint64_t accumobjs; 78 uint64_t accumbytes; 79 }; 80 81 typedef enum { 82 prof_tctx_state_initializing, 83 prof_tctx_state_nominal, 84 prof_tctx_state_dumping, 85 prof_tctx_state_purgatory /* Dumper must finish destroying. */ 86 } prof_tctx_state_t; 87 88 struct prof_tctx_s { 89 /* Thread data for thread that performed the allocation. */ 90 prof_tdata_t *tdata; 91 92 /* 93 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be 94 * defunct during teardown. 95 */ 96 uint64_t thr_uid; 97 uint64_t thr_discrim; 98 99 /* Profiling counters, protected by tdata->lock. */ 100 prof_cnt_t cnts; 101 102 /* Associated global context. */ 103 prof_gctx_t *gctx; 104 105 /* 106 * UID that distinguishes multiple tctx's created by the same thread, 107 * but coexisting in gctx->tctxs. There are two ways that such 108 * coexistence can occur: 109 * - A dumper thread can cause a tctx to be retained in the purgatory 110 * state. 111 * - Although a single "producer" thread must create all tctx's which 112 * share the same thr_uid, multiple "consumers" can each concurrently 113 * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only 114 * gets called once each time cnts.cur{objs,bytes} drop to 0, but this 115 * threshold can be hit again before the first consumer finishes 116 * executing prof_tctx_destroy(). 117 */ 118 uint64_t tctx_uid; 119 120 /* Linkage into gctx's tctxs. */ 121 rb_node(prof_tctx_t) tctx_link; 122 123 /* 124 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents 125 * sample vs destroy race. 126 */ 127 bool prepared; 128 129 /* Current dump-related state, protected by gctx->lock. */ 130 prof_tctx_state_t state; 131 132 /* 133 * Copy of cnts snapshotted during early dump phase, protected by 134 * dump_mtx. 135 */ 136 prof_cnt_t dump_cnts; 137 }; 138 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; 139 140 struct prof_gctx_s { 141 /* Protects nlimbo, cnt_summed, and tctxs. */ 142 malloc_mutex_t *lock; 143 144 /* 145 * Number of threads that currently cause this gctx to be in a state of 146 * limbo due to one of: 147 * - Initializing this gctx. 148 * - Initializing per thread counters associated with this gctx. 149 * - Preparing to destroy this gctx. 150 * - Dumping a heap profile that includes this gctx. 151 * nlimbo must be 1 (single destroyer) in order to safely destroy the 152 * gctx. 153 */ 154 unsigned nlimbo; 155 156 /* 157 * Tree of profile counters, one for each thread that has allocated in 158 * this context. 159 */ 160 prof_tctx_tree_t tctxs; 161 162 /* Linkage for tree of contexts to be dumped. */ 163 rb_node(prof_gctx_t) dump_link; 164 165 /* Temporary storage for summation during dump. */ 166 prof_cnt_t cnt_summed; 167 168 /* Associated backtrace. */ 169 prof_bt_t bt; 170 171 /* Backtrace vector, variable size, referred to by bt. */ 172 void *vec[1]; 173 }; 174 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; 175 176 struct prof_tdata_s { 177 malloc_mutex_t *lock; 178 179 /* Monotonically increasing unique thread identifier. */ 180 uint64_t thr_uid; 181 182 /* 183 * Monotonically increasing discriminator among tdata structures 184 * associated with the same thr_uid. 185 */ 186 uint64_t thr_discrim; 187 188 /* Included in heap profile dumps if non-NULL. */ 189 char *thread_name; 190 191 bool attached; 192 bool expired; 193 194 rb_node(prof_tdata_t) tdata_link; 195 196 /* 197 * Counter used to initialize prof_tctx_t's tctx_uid. No locking is 198 * necessary when incrementing this field, because only one thread ever 199 * does so. 200 */ 201 uint64_t tctx_uid_next; 202 203 /* 204 * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks 205 * backtraces for which it has non-zero allocation/deallocation counters 206 * associated with thread-specific prof_tctx_t objects. Other threads 207 * may write to prof_tctx_t contents when freeing associated objects. 208 */ 209 ckh_t bt2tctx; 210 211 /* Sampling state. */ 212 uint64_t prng_state; 213 uint64_t bytes_until_sample; 214 215 /* State used to avoid dumping while operating on prof internals. */ 216 bool enq; 217 bool enq_idump; 218 bool enq_gdump; 219 220 /* 221 * Set to true during an early dump phase for tdata's which are 222 * currently being dumped. New threads' tdata's have this initialized 223 * to false so that they aren't accidentally included in later dump 224 * phases. 225 */ 226 bool dumping; 227 228 /* 229 * True if profiling is active for this tdata's thread 230 * (thread.prof.active mallctl). 231 */ 232 bool active; 233 234 /* Temporary storage for summation during dump. */ 235 prof_cnt_t cnt_summed; 236 237 /* Backtrace vector, used for calls to prof_backtrace(). */ 238 void *vec[PROF_BT_MAX]; 239 }; 240 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; 241 242 #endif /* JEMALLOC_H_STRUCTS */ 243 /******************************************************************************/ 244 #ifdef JEMALLOC_H_EXTERNS 245 246 extern bool opt_prof; 247 extern bool opt_prof_active; 248 extern bool opt_prof_thread_active_init; 249 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ 250 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ 251 extern bool opt_prof_gdump; /* High-water memory dumping. */ 252 extern bool opt_prof_final; /* Final profile dumping. */ 253 extern bool opt_prof_leak; /* Dump leak summary at exit. */ 254 extern bool opt_prof_accum; /* Report cumulative bytes. */ 255 extern char opt_prof_prefix[ 256 /* Minimize memory bloat for non-prof builds. */ 257 #ifdef JEMALLOC_PROF 258 PATH_MAX + 259 #endif 260 1]; 261 262 /* Accessed via prof_active_[gs]et{_unlocked,}(). */ 263 extern bool prof_active; 264 265 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ 266 extern bool prof_gdump_val; 267 268 /* 269 * Profile dump interval, measured in bytes allocated. Each arena triggers a 270 * profile dump when it reaches this threshold. The effect is that the 271 * interval between profile dumps averages prof_interval, though the actual 272 * interval between dumps will tend to be sporadic, and the interval will be a 273 * maximum of approximately (prof_interval * narenas). 274 */ 275 extern uint64_t prof_interval; 276 277 /* 278 * Initialized as opt_lg_prof_sample, and potentially modified during profiling 279 * resets. 280 */ 281 extern size_t lg_prof_sample; 282 283 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); 284 void prof_malloc_sample_object(const void *ptr, size_t usize, 285 prof_tctx_t *tctx); 286 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); 287 void bt_init(prof_bt_t *bt, void **vec); 288 void prof_backtrace(prof_bt_t *bt); 289 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); 290 #ifdef JEMALLOC_JET 291 size_t prof_tdata_count(void); 292 size_t prof_bt_count(void); 293 const prof_cnt_t *prof_cnt_all(void); 294 typedef int (prof_dump_open_t)(bool, const char *); 295 extern prof_dump_open_t *prof_dump_open; 296 typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); 297 extern prof_dump_header_t *prof_dump_header; 298 #endif 299 void prof_idump(void); 300 bool prof_mdump(const char *filename); 301 void prof_gdump(void); 302 prof_tdata_t *prof_tdata_init(tsd_t *tsd); 303 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); 304 void prof_reset(tsd_t *tsd, size_t lg_sample); 305 void prof_tdata_cleanup(tsd_t *tsd); 306 const char *prof_thread_name_get(void); 307 bool prof_active_get(void); 308 bool prof_active_set(bool active); 309 int prof_thread_name_set(tsd_t *tsd, const char *thread_name); 310 bool prof_thread_active_get(void); 311 bool prof_thread_active_set(bool active); 312 bool prof_thread_active_init_get(void); 313 bool prof_thread_active_init_set(bool active_init); 314 bool prof_gdump_get(void); 315 bool prof_gdump_set(bool active); 316 void prof_boot0(void); 317 void prof_boot1(void); 318 bool prof_boot2(void); 319 void prof_prefork(void); 320 void prof_postfork_parent(void); 321 void prof_postfork_child(void); 322 void prof_sample_threshold_update(prof_tdata_t *tdata); 323 324 #endif /* JEMALLOC_H_EXTERNS */ 325 /******************************************************************************/ 326 #ifdef JEMALLOC_H_INLINES 327 328 #ifndef JEMALLOC_ENABLE_INLINE 329 bool prof_active_get_unlocked(void); 330 bool prof_gdump_get_unlocked(void); 331 prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); 332 bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, 333 prof_tdata_t **tdata_out); 334 prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, 335 bool update); 336 prof_tctx_t *prof_tctx_get(const void *ptr); 337 void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); 338 void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, 339 prof_tctx_t *tctx); 340 void prof_malloc_sample_object(const void *ptr, size_t usize, 341 prof_tctx_t *tctx); 342 void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); 343 void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, 344 prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, 345 size_t old_usize, prof_tctx_t *old_tctx); 346 void prof_free(tsd_t *tsd, const void *ptr, size_t usize); 347 #endif 348 349 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) 350 JEMALLOC_ALWAYS_INLINE bool 351 prof_active_get_unlocked(void) 352 { 353 354 /* 355 * Even if opt_prof is true, sampling can be temporarily disabled by 356 * setting prof_active to false. No locking is used when reading 357 * prof_active in the fast path, so there are no guarantees regarding 358 * how long it will take for all threads to notice state changes. 359 */ 360 return (prof_active); 361 } 362 363 JEMALLOC_ALWAYS_INLINE bool 364 prof_gdump_get_unlocked(void) 365 { 366 367 /* 368 * No locking is used when reading prof_gdump_val in the fast path, so 369 * there are no guarantees regarding how long it will take for all 370 * threads to notice state changes. 371 */ 372 return (prof_gdump_val); 373 } 374 375 JEMALLOC_ALWAYS_INLINE prof_tdata_t * 376 prof_tdata_get(tsd_t *tsd, bool create) 377 { 378 prof_tdata_t *tdata; 379 380 cassert(config_prof); 381 382 tdata = tsd_prof_tdata_get(tsd); 383 if (create) { 384 if (unlikely(tdata == NULL)) { 385 if (tsd_nominal(tsd)) { 386 tdata = prof_tdata_init(tsd); 387 tsd_prof_tdata_set(tsd, tdata); 388 } 389 } else if (unlikely(tdata->expired)) { 390 tdata = prof_tdata_reinit(tsd, tdata); 391 tsd_prof_tdata_set(tsd, tdata); 392 } 393 assert(tdata == NULL || tdata->attached); 394 } 395 396 return (tdata); 397 } 398 399 JEMALLOC_ALWAYS_INLINE prof_tctx_t * 400 prof_tctx_get(const void *ptr) 401 { 402 403 cassert(config_prof); 404 assert(ptr != NULL); 405 406 return (arena_prof_tctx_get(ptr)); 407 } 408 409 JEMALLOC_ALWAYS_INLINE void 410 prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) 411 { 412 413 cassert(config_prof); 414 assert(ptr != NULL); 415 416 arena_prof_tctx_set(ptr, usize, tctx); 417 } 418 419 JEMALLOC_ALWAYS_INLINE void 420 prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, 421 prof_tctx_t *old_tctx) 422 { 423 424 cassert(config_prof); 425 assert(ptr != NULL); 426 427 arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); 428 } 429 430 JEMALLOC_ALWAYS_INLINE bool 431 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, 432 prof_tdata_t **tdata_out) 433 { 434 prof_tdata_t *tdata; 435 436 cassert(config_prof); 437 438 tdata = prof_tdata_get(tsd, true); 439 if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) 440 tdata = NULL; 441 442 if (tdata_out != NULL) 443 *tdata_out = tdata; 444 445 if (unlikely(tdata == NULL)) 446 return (true); 447 448 if (likely(tdata->bytes_until_sample >= usize)) { 449 if (update) 450 tdata->bytes_until_sample -= usize; 451 return (true); 452 } else { 453 /* Compute new sample threshold. */ 454 if (update) 455 prof_sample_threshold_update(tdata); 456 return (!tdata->active); 457 } 458 } 459 460 JEMALLOC_ALWAYS_INLINE prof_tctx_t * 461 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) 462 { 463 prof_tctx_t *ret; 464 prof_tdata_t *tdata; 465 prof_bt_t bt; 466 467 assert(usize == s2u(usize)); 468 469 if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, 470 &tdata))) 471 ret = (prof_tctx_t *)(uintptr_t)1U; 472 else { 473 bt_init(&bt, tdata->vec); 474 prof_backtrace(&bt); 475 ret = prof_lookup(tsd, &bt); 476 } 477 478 return (ret); 479 } 480 481 JEMALLOC_ALWAYS_INLINE void 482 prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) 483 { 484 485 cassert(config_prof); 486 assert(ptr != NULL); 487 assert(usize == isalloc(ptr, true)); 488 489 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 490 prof_malloc_sample_object(ptr, usize, tctx); 491 else 492 prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); 493 } 494 495 JEMALLOC_ALWAYS_INLINE void 496 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, 497 bool prof_active, bool updated, const void *old_ptr, size_t old_usize, 498 prof_tctx_t *old_tctx) 499 { 500 bool sampled, old_sampled; 501 502 cassert(config_prof); 503 assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); 504 505 if (prof_active && !updated && ptr != NULL) { 506 assert(usize == isalloc(ptr, true)); 507 if (prof_sample_accum_update(tsd, usize, true, NULL)) { 508 /* 509 * Don't sample. The usize passed to prof_alloc_prep() 510 * was larger than what actually got allocated, so a 511 * backtrace was captured for this allocation, even 512 * though its actual usize was insufficient to cross the 513 * sample threshold. 514 */ 515 tctx = (prof_tctx_t *)(uintptr_t)1U; 516 } 517 } 518 519 sampled = ((uintptr_t)tctx > (uintptr_t)1U); 520 old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); 521 522 if (unlikely(sampled)) 523 prof_malloc_sample_object(ptr, usize, tctx); 524 else 525 prof_tctx_reset(ptr, usize, old_ptr, old_tctx); 526 527 if (unlikely(old_sampled)) 528 prof_free_sampled_object(tsd, old_usize, old_tctx); 529 } 530 531 JEMALLOC_ALWAYS_INLINE void 532 prof_free(tsd_t *tsd, const void *ptr, size_t usize) 533 { 534 prof_tctx_t *tctx = prof_tctx_get(ptr); 535 536 cassert(config_prof); 537 assert(usize == isalloc(ptr, true)); 538 539 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 540 prof_free_sampled_object(tsd, usize, tctx); 541 } 542 #endif 543 544 #endif /* JEMALLOC_H_INLINES */ 545 /******************************************************************************/ 546