1 /******************************************************************************/ 2 #ifdef JEMALLOC_H_TYPES 3 4 typedef struct prof_bt_s prof_bt_t; 5 typedef struct prof_cnt_s prof_cnt_t; 6 typedef struct prof_tctx_s prof_tctx_t; 7 typedef struct prof_gctx_s prof_gctx_t; 8 typedef struct prof_tdata_s prof_tdata_t; 9 10 /* Option defaults. */ 11 #ifdef JEMALLOC_PROF 12 # define PROF_PREFIX_DEFAULT "jeprof" 13 #else 14 # define PROF_PREFIX_DEFAULT "" 15 #endif 16 #define LG_PROF_SAMPLE_DEFAULT 19 17 #define LG_PROF_INTERVAL_DEFAULT -1 18 19 /* 20 * Hard limit on stack backtrace depth. The version of prof_backtrace() that 21 * is based on __builtin_return_address() necessarily has a hard-coded number 22 * of backtrace frame handlers, and should be kept in sync with this setting. 23 */ 24 #define PROF_BT_MAX 128 25 26 /* Initial hash table size. */ 27 #define PROF_CKH_MINITEMS 64 28 29 /* Size of memory buffer to use when writing dump files. */ 30 #define PROF_DUMP_BUFSIZE 65536 31 32 /* Size of stack-allocated buffer used by prof_printf(). */ 33 #define PROF_PRINTF_BUFSIZE 128 34 35 /* 36 * Number of mutexes shared among all gctx's. No space is allocated for these 37 * unless profiling is enabled, so it's okay to over-provision. 38 */ 39 #define PROF_NCTX_LOCKS 1024 40 41 /* 42 * Number of mutexes shared among all tdata's. No space is allocated for these 43 * unless profiling is enabled, so it's okay to over-provision. 44 */ 45 #define PROF_NTDATA_LOCKS 256 46 47 /* 48 * prof_tdata pointers close to NULL are used to encode state information that 49 * is used for cleaning up during thread shutdown. 50 */ 51 #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) 52 #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) 53 #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY 54 55 #endif /* JEMALLOC_H_TYPES */ 56 /******************************************************************************/ 57 #ifdef JEMALLOC_H_STRUCTS 58 59 struct prof_bt_s { 60 /* Backtrace, stored as len program counters. */ 61 void **vec; 62 unsigned len; 63 }; 64 65 #ifdef JEMALLOC_PROF_LIBGCC 66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ 67 typedef struct { 68 prof_bt_t *bt; 69 unsigned max; 70 } prof_unwind_data_t; 71 #endif 72 73 struct prof_cnt_s { 74 /* Profiling counters. */ 75 uint64_t curobjs; 76 uint64_t curbytes; 77 uint64_t accumobjs; 78 uint64_t accumbytes; 79 }; 80 81 typedef enum { 82 prof_tctx_state_initializing, 83 prof_tctx_state_nominal, 84 prof_tctx_state_dumping, 85 prof_tctx_state_purgatory /* Dumper must finish destroying. */ 86 } prof_tctx_state_t; 87 88 struct prof_tctx_s { 89 /* Thread data for thread that performed the allocation. */ 90 prof_tdata_t *tdata; 91 92 /* 93 * Copy of tdata->thr_uid, necessary because tdata may be defunct during 94 * teardown. 95 */ 96 uint64_t thr_uid; 97 98 /* Profiling counters, protected by tdata->lock. */ 99 prof_cnt_t cnts; 100 101 /* Associated global context. */ 102 prof_gctx_t *gctx; 103 104 /* 105 * UID that distinguishes multiple tctx's created by the same thread, 106 * but coexisting in gctx->tctxs. There are two ways that such 107 * coexistence can occur: 108 * - A dumper thread can cause a tctx to be retained in the purgatory 109 * state. 110 * - Although a single "producer" thread must create all tctx's which 111 * share the same thr_uid, multiple "consumers" can each concurrently 112 * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only 113 * gets called once each time cnts.cur{objs,bytes} drop to 0, but this 114 * threshold can be hit again before the first consumer finishes 115 * executing prof_tctx_destroy(). 116 */ 117 uint64_t tctx_uid; 118 119 /* Linkage into gctx's tctxs. */ 120 rb_node(prof_tctx_t) tctx_link; 121 122 /* 123 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents 124 * sample vs destroy race. 125 */ 126 bool prepared; 127 128 /* Current dump-related state, protected by gctx->lock. */ 129 prof_tctx_state_t state; 130 131 /* 132 * Copy of cnts snapshotted during early dump phase, protected by 133 * dump_mtx. 134 */ 135 prof_cnt_t dump_cnts; 136 }; 137 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; 138 139 struct prof_gctx_s { 140 /* Protects nlimbo, cnt_summed, and tctxs. */ 141 malloc_mutex_t *lock; 142 143 /* 144 * Number of threads that currently cause this gctx to be in a state of 145 * limbo due to one of: 146 * - Initializing this gctx. 147 * - Initializing per thread counters associated with this gctx. 148 * - Preparing to destroy this gctx. 149 * - Dumping a heap profile that includes this gctx. 150 * nlimbo must be 1 (single destroyer) in order to safely destroy the 151 * gctx. 152 */ 153 unsigned nlimbo; 154 155 /* 156 * Tree of profile counters, one for each thread that has allocated in 157 * this context. 158 */ 159 prof_tctx_tree_t tctxs; 160 161 /* Linkage for tree of contexts to be dumped. */ 162 rb_node(prof_gctx_t) dump_link; 163 164 /* Temporary storage for summation during dump. */ 165 prof_cnt_t cnt_summed; 166 167 /* Associated backtrace. */ 168 prof_bt_t bt; 169 170 /* Backtrace vector, variable size, referred to by bt. */ 171 void *vec[1]; 172 }; 173 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; 174 175 struct prof_tdata_s { 176 malloc_mutex_t *lock; 177 178 /* Monotonically increasing unique thread identifier. */ 179 uint64_t thr_uid; 180 181 /* 182 * Monotonically increasing discriminator among tdata structures 183 * associated with the same thr_uid. 184 */ 185 uint64_t thr_discrim; 186 187 /* Included in heap profile dumps if non-NULL. */ 188 char *thread_name; 189 190 bool attached; 191 bool expired; 192 193 rb_node(prof_tdata_t) tdata_link; 194 195 /* 196 * Counter used to initialize prof_tctx_t's tctx_uid. No locking is 197 * necessary when incrementing this field, because only one thread ever 198 * does so. 199 */ 200 uint64_t tctx_uid_next; 201 202 /* 203 * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks 204 * backtraces for which it has non-zero allocation/deallocation counters 205 * associated with thread-specific prof_tctx_t objects. Other threads 206 * may write to prof_tctx_t contents when freeing associated objects. 207 */ 208 ckh_t bt2tctx; 209 210 /* Sampling state. */ 211 uint64_t prng_state; 212 uint64_t bytes_until_sample; 213 214 /* State used to avoid dumping while operating on prof internals. */ 215 bool enq; 216 bool enq_idump; 217 bool enq_gdump; 218 219 /* 220 * Set to true during an early dump phase for tdata's which are 221 * currently being dumped. New threads' tdata's have this initialized 222 * to false so that they aren't accidentally included in later dump 223 * phases. 224 */ 225 bool dumping; 226 227 /* 228 * True if profiling is active for this tdata's thread 229 * (thread.prof.active mallctl). 230 */ 231 bool active; 232 233 /* Temporary storage for summation during dump. */ 234 prof_cnt_t cnt_summed; 235 236 /* Backtrace vector, used for calls to prof_backtrace(). */ 237 void *vec[PROF_BT_MAX]; 238 }; 239 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; 240 241 #endif /* JEMALLOC_H_STRUCTS */ 242 /******************************************************************************/ 243 #ifdef JEMALLOC_H_EXTERNS 244 245 extern bool opt_prof; 246 extern bool opt_prof_active; 247 extern bool opt_prof_thread_active_init; 248 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ 249 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ 250 extern bool opt_prof_gdump; /* High-water memory dumping. */ 251 extern bool opt_prof_final; /* Final profile dumping. */ 252 extern bool opt_prof_leak; /* Dump leak summary at exit. */ 253 extern bool opt_prof_accum; /* Report cumulative bytes. */ 254 extern char opt_prof_prefix[ 255 /* Minimize memory bloat for non-prof builds. */ 256 #ifdef JEMALLOC_PROF 257 PATH_MAX + 258 #endif 259 1]; 260 261 /* Accessed via prof_active_[gs]et{_unlocked,}(). */ 262 extern bool prof_active; 263 264 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ 265 extern bool prof_gdump_val; 266 267 /* 268 * Profile dump interval, measured in bytes allocated. Each arena triggers a 269 * profile dump when it reaches this threshold. The effect is that the 270 * interval between profile dumps averages prof_interval, though the actual 271 * interval between dumps will tend to be sporadic, and the interval will be a 272 * maximum of approximately (prof_interval * narenas). 273 */ 274 extern uint64_t prof_interval; 275 276 /* 277 * Initialized as opt_lg_prof_sample, and potentially modified during profiling 278 * resets. 279 */ 280 extern size_t lg_prof_sample; 281 282 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); 283 void prof_malloc_sample_object(const void *ptr, size_t usize, 284 prof_tctx_t *tctx); 285 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); 286 void bt_init(prof_bt_t *bt, void **vec); 287 void prof_backtrace(prof_bt_t *bt); 288 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); 289 #ifdef JEMALLOC_JET 290 size_t prof_tdata_count(void); 291 size_t prof_bt_count(void); 292 const prof_cnt_t *prof_cnt_all(void); 293 typedef int (prof_dump_open_t)(bool, const char *); 294 extern prof_dump_open_t *prof_dump_open; 295 typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); 296 extern prof_dump_header_t *prof_dump_header; 297 #endif 298 void prof_idump(void); 299 bool prof_mdump(const char *filename); 300 void prof_gdump(void); 301 prof_tdata_t *prof_tdata_init(tsd_t *tsd); 302 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); 303 void prof_reset(tsd_t *tsd, size_t lg_sample); 304 void prof_tdata_cleanup(tsd_t *tsd); 305 const char *prof_thread_name_get(void); 306 bool prof_active_get(void); 307 bool prof_active_set(bool active); 308 int prof_thread_name_set(tsd_t *tsd, const char *thread_name); 309 bool prof_thread_active_get(void); 310 bool prof_thread_active_set(bool active); 311 bool prof_thread_active_init_get(void); 312 bool prof_thread_active_init_set(bool active_init); 313 bool prof_gdump_get(void); 314 bool prof_gdump_set(bool active); 315 void prof_boot0(void); 316 void prof_boot1(void); 317 bool prof_boot2(void); 318 void prof_prefork(void); 319 void prof_postfork_parent(void); 320 void prof_postfork_child(void); 321 void prof_sample_threshold_update(prof_tdata_t *tdata); 322 323 #endif /* JEMALLOC_H_EXTERNS */ 324 /******************************************************************************/ 325 #ifdef JEMALLOC_H_INLINES 326 327 #ifndef JEMALLOC_ENABLE_INLINE 328 bool prof_active_get_unlocked(void); 329 bool prof_gdump_get_unlocked(void); 330 prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); 331 bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, 332 prof_tdata_t **tdata_out); 333 prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update); 334 prof_tctx_t *prof_tctx_get(const void *ptr); 335 void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); 336 void prof_malloc_sample_object(const void *ptr, size_t usize, 337 prof_tctx_t *tctx); 338 void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); 339 void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, 340 prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx); 341 void prof_free(tsd_t *tsd, const void *ptr, size_t usize); 342 #endif 343 344 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) 345 JEMALLOC_ALWAYS_INLINE bool 346 prof_active_get_unlocked(void) 347 { 348 349 /* 350 * Even if opt_prof is true, sampling can be temporarily disabled by 351 * setting prof_active to false. No locking is used when reading 352 * prof_active in the fast path, so there are no guarantees regarding 353 * how long it will take for all threads to notice state changes. 354 */ 355 return (prof_active); 356 } 357 358 JEMALLOC_ALWAYS_INLINE bool 359 prof_gdump_get_unlocked(void) 360 { 361 362 /* 363 * No locking is used when reading prof_gdump_val in the fast path, so 364 * there are no guarantees regarding how long it will take for all 365 * threads to notice state changes. 366 */ 367 return (prof_gdump_val); 368 } 369 370 JEMALLOC_ALWAYS_INLINE prof_tdata_t * 371 prof_tdata_get(tsd_t *tsd, bool create) 372 { 373 prof_tdata_t *tdata; 374 375 cassert(config_prof); 376 377 tdata = tsd_prof_tdata_get(tsd); 378 if (create) { 379 if (unlikely(tdata == NULL)) { 380 if (tsd_nominal(tsd)) { 381 tdata = prof_tdata_init(tsd); 382 tsd_prof_tdata_set(tsd, tdata); 383 } 384 } else if (unlikely(tdata->expired)) { 385 tdata = prof_tdata_reinit(tsd, tdata); 386 tsd_prof_tdata_set(tsd, tdata); 387 } 388 assert(tdata == NULL || tdata->attached); 389 } 390 391 return (tdata); 392 } 393 394 JEMALLOC_ALWAYS_INLINE prof_tctx_t * 395 prof_tctx_get(const void *ptr) 396 { 397 398 cassert(config_prof); 399 assert(ptr != NULL); 400 401 return (arena_prof_tctx_get(ptr)); 402 } 403 404 JEMALLOC_ALWAYS_INLINE void 405 prof_tctx_set(const void *ptr, prof_tctx_t *tctx) 406 { 407 408 cassert(config_prof); 409 assert(ptr != NULL); 410 411 arena_prof_tctx_set(ptr, tctx); 412 } 413 414 JEMALLOC_ALWAYS_INLINE bool 415 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, 416 prof_tdata_t **tdata_out) 417 { 418 prof_tdata_t *tdata; 419 420 cassert(config_prof); 421 422 tdata = prof_tdata_get(tsd, true); 423 if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 424 tdata = NULL; 425 426 if (tdata_out != NULL) 427 *tdata_out = tdata; 428 429 if (tdata == NULL) 430 return (true); 431 432 if (tdata->bytes_until_sample >= usize) { 433 if (update) 434 tdata->bytes_until_sample -= usize; 435 return (true); 436 } else { 437 /* Compute new sample threshold. */ 438 if (update) 439 prof_sample_threshold_update(tdata); 440 return (!tdata->active); 441 } 442 } 443 444 JEMALLOC_ALWAYS_INLINE prof_tctx_t * 445 prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) 446 { 447 prof_tctx_t *ret; 448 prof_tdata_t *tdata; 449 prof_bt_t bt; 450 451 assert(usize == s2u(usize)); 452 453 if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd, 454 usize, update, &tdata))) 455 ret = (prof_tctx_t *)(uintptr_t)1U; 456 else { 457 bt_init(&bt, tdata->vec); 458 prof_backtrace(&bt); 459 ret = prof_lookup(tsd, &bt); 460 } 461 462 return (ret); 463 } 464 465 JEMALLOC_ALWAYS_INLINE void 466 prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) 467 { 468 469 cassert(config_prof); 470 assert(ptr != NULL); 471 assert(usize == isalloc(ptr, true)); 472 473 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 474 prof_malloc_sample_object(ptr, usize, tctx); 475 else 476 prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); 477 } 478 479 JEMALLOC_ALWAYS_INLINE void 480 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, 481 bool updated, size_t old_usize, prof_tctx_t *old_tctx) 482 { 483 484 cassert(config_prof); 485 assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); 486 487 if (!updated && ptr != NULL) { 488 assert(usize == isalloc(ptr, true)); 489 if (prof_sample_accum_update(tsd, usize, true, NULL)) { 490 /* 491 * Don't sample. The usize passed to PROF_ALLOC_PREP() 492 * was larger than what actually got allocated, so a 493 * backtrace was captured for this allocation, even 494 * though its actual usize was insufficient to cross the 495 * sample threshold. 496 */ 497 tctx = (prof_tctx_t *)(uintptr_t)1U; 498 } 499 } 500 501 if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U)) 502 prof_free_sampled_object(tsd, old_usize, old_tctx); 503 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 504 prof_malloc_sample_object(ptr, usize, tctx); 505 else 506 prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); 507 } 508 509 JEMALLOC_ALWAYS_INLINE void 510 prof_free(tsd_t *tsd, const void *ptr, size_t usize) 511 { 512 prof_tctx_t *tctx = prof_tctx_get(ptr); 513 514 cassert(config_prof); 515 assert(usize == isalloc(ptr, true)); 516 517 if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) 518 prof_free_sampled_object(tsd, usize, tctx); 519 } 520 #endif 521 522 #endif /* JEMALLOC_H_INLINES */ 523 /******************************************************************************/ 524