Home | History | Annotate | Download | only in src
      1 #define	JEMALLOC_PROF_C_
      2 #include "jemalloc/internal/jemalloc_internal.h"
      3 /******************************************************************************/
      4 
      5 #ifdef JEMALLOC_PROF_LIBUNWIND
      6 #define	UNW_LOCAL_ONLY
      7 #include <libunwind.h>
      8 #endif
      9 
     10 #ifdef JEMALLOC_PROF_LIBGCC
     11 #include <unwind.h>
     12 #endif
     13 
     14 /******************************************************************************/
     15 /* Data. */
     16 
     17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
     18 
     19 bool		opt_prof = false;
     20 bool		opt_prof_active = true;
     21 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
     22 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
     23 bool		opt_prof_gdump = false;
     24 bool		opt_prof_final = true;
     25 bool		opt_prof_leak = false;
     26 bool		opt_prof_accum = false;
     27 char		opt_prof_prefix[
     28     /* Minimize memory bloat for non-prof builds. */
     29 #ifdef JEMALLOC_PROF
     30     PATH_MAX +
     31 #endif
     32     1];
     33 
     34 uint64_t	prof_interval = 0;
     35 
     36 /*
     37  * Table of mutexes that are shared among ctx's.  These are leaf locks, so
     38  * there is no problem with using them for more than one ctx at the same time.
     39  * The primary motivation for this sharing though is that ctx's are ephemeral,
     40  * and destroying mutexes causes complications for systems that allocate when
     41  * creating/destroying mutexes.
     42  */
     43 static malloc_mutex_t	*ctx_locks;
     44 static unsigned		cum_ctxs; /* Atomic counter. */
     45 
     46 /*
     47  * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
     48  * structure that knows about all backtraces currently captured.
     49  */
     50 static ckh_t		bt2ctx;
     51 static malloc_mutex_t	bt2ctx_mtx;
     52 
     53 static malloc_mutex_t	prof_dump_seq_mtx;
     54 static uint64_t		prof_dump_seq;
     55 static uint64_t		prof_dump_iseq;
     56 static uint64_t		prof_dump_mseq;
     57 static uint64_t		prof_dump_useq;
     58 
     59 /*
     60  * This buffer is rather large for stack allocation, so use a single buffer for
     61  * all profile dumps.
     62  */
     63 static malloc_mutex_t	prof_dump_mtx;
     64 static char		prof_dump_buf[
     65     /* Minimize memory bloat for non-prof builds. */
     66 #ifdef JEMALLOC_PROF
     67     PROF_DUMP_BUFSIZE
     68 #else
     69     1
     70 #endif
     71 ];
     72 static unsigned		prof_dump_buf_end;
     73 static int		prof_dump_fd;
     74 
     75 /* Do not dump any profiles until bootstrapping is complete. */
     76 static bool		prof_booted = false;
     77 
     78 /******************************************************************************/
     79 
     80 void
     81 bt_init(prof_bt_t *bt, void **vec)
     82 {
     83 
     84 	cassert(config_prof);
     85 
     86 	bt->vec = vec;
     87 	bt->len = 0;
     88 }
     89 
     90 static void
     91 bt_destroy(prof_bt_t *bt)
     92 {
     93 
     94 	cassert(config_prof);
     95 
     96 	idalloc(bt);
     97 }
     98 
     99 static prof_bt_t *
    100 bt_dup(prof_bt_t *bt)
    101 {
    102 	prof_bt_t *ret;
    103 
    104 	cassert(config_prof);
    105 
    106 	/*
    107 	 * Create a single allocation that has space for vec immediately
    108 	 * following the prof_bt_t structure.  The backtraces that get
    109 	 * stored in the backtrace caches are copied from stack-allocated
    110 	 * temporary variables, so size is known at creation time.  Making this
    111 	 * a contiguous object improves cache locality.
    112 	 */
    113 	ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
    114 	    (bt->len * sizeof(void *)));
    115 	if (ret == NULL)
    116 		return (NULL);
    117 	ret->vec = (void **)((uintptr_t)ret +
    118 	    QUANTUM_CEILING(sizeof(prof_bt_t)));
    119 	memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
    120 	ret->len = bt->len;
    121 
    122 	return (ret);
    123 }
    124 
    125 static inline void
    126 prof_enter(prof_tdata_t *prof_tdata)
    127 {
    128 
    129 	cassert(config_prof);
    130 
    131 	assert(prof_tdata->enq == false);
    132 	prof_tdata->enq = true;
    133 
    134 	malloc_mutex_lock(&bt2ctx_mtx);
    135 }
    136 
    137 static inline void
    138 prof_leave(prof_tdata_t *prof_tdata)
    139 {
    140 	bool idump, gdump;
    141 
    142 	cassert(config_prof);
    143 
    144 	malloc_mutex_unlock(&bt2ctx_mtx);
    145 
    146 	assert(prof_tdata->enq);
    147 	prof_tdata->enq = false;
    148 	idump = prof_tdata->enq_idump;
    149 	prof_tdata->enq_idump = false;
    150 	gdump = prof_tdata->enq_gdump;
    151 	prof_tdata->enq_gdump = false;
    152 
    153 	if (idump)
    154 		prof_idump();
    155 	if (gdump)
    156 		prof_gdump();
    157 }
    158 
    159 #ifdef JEMALLOC_PROF_LIBUNWIND
    160 void
    161 prof_backtrace(prof_bt_t *bt)
    162 {
    163 	int nframes;
    164 
    165 	cassert(config_prof);
    166 	assert(bt->len == 0);
    167 	assert(bt->vec != NULL);
    168 
    169 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
    170 	if (nframes <= 0)
    171 		return;
    172 	bt->len = nframes;
    173 }
    174 #elif (defined(JEMALLOC_PROF_LIBGCC))
    175 static _Unwind_Reason_Code
    176 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
    177 {
    178 
    179 	cassert(config_prof);
    180 
    181 	return (_URC_NO_REASON);
    182 }
    183 
    184 static _Unwind_Reason_Code
    185 prof_unwind_callback(struct _Unwind_Context *context, void *arg)
    186 {
    187 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
    188 	void *ip;
    189 
    190 	cassert(config_prof);
    191 
    192 	ip = (void *)_Unwind_GetIP(context);
    193 	if (ip == NULL)
    194 		return (_URC_END_OF_STACK);
    195 	data->bt->vec[data->bt->len] = ip;
    196 	data->bt->len++;
    197 	if (data->bt->len == data->max)
    198 		return (_URC_END_OF_STACK);
    199 
    200 	return (_URC_NO_REASON);
    201 }
    202 
    203 void
    204 prof_backtrace(prof_bt_t *bt)
    205 {
    206 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
    207 
    208 	cassert(config_prof);
    209 
    210 	_Unwind_Backtrace(prof_unwind_callback, &data);
    211 }
    212 #elif (defined(JEMALLOC_PROF_GCC))
    213 void
    214 prof_backtrace(prof_bt_t *bt)
    215 {
    216 #define	BT_FRAME(i)							\
    217 	if ((i) < PROF_BT_MAX) {					\
    218 		void *p;						\
    219 		if (__builtin_frame_address(i) == 0)			\
    220 			return;						\
    221 		p = __builtin_return_address(i);			\
    222 		if (p == NULL)						\
    223 			return;						\
    224 		bt->vec[(i)] = p;					\
    225 		bt->len = (i) + 1;					\
    226 	} else								\
    227 		return;
    228 
    229 	cassert(config_prof);
    230 
    231 	BT_FRAME(0)
    232 	BT_FRAME(1)
    233 	BT_FRAME(2)
    234 	BT_FRAME(3)
    235 	BT_FRAME(4)
    236 	BT_FRAME(5)
    237 	BT_FRAME(6)
    238 	BT_FRAME(7)
    239 	BT_FRAME(8)
    240 	BT_FRAME(9)
    241 
    242 	BT_FRAME(10)
    243 	BT_FRAME(11)
    244 	BT_FRAME(12)
    245 	BT_FRAME(13)
    246 	BT_FRAME(14)
    247 	BT_FRAME(15)
    248 	BT_FRAME(16)
    249 	BT_FRAME(17)
    250 	BT_FRAME(18)
    251 	BT_FRAME(19)
    252 
    253 	BT_FRAME(20)
    254 	BT_FRAME(21)
    255 	BT_FRAME(22)
    256 	BT_FRAME(23)
    257 	BT_FRAME(24)
    258 	BT_FRAME(25)
    259 	BT_FRAME(26)
    260 	BT_FRAME(27)
    261 	BT_FRAME(28)
    262 	BT_FRAME(29)
    263 
    264 	BT_FRAME(30)
    265 	BT_FRAME(31)
    266 	BT_FRAME(32)
    267 	BT_FRAME(33)
    268 	BT_FRAME(34)
    269 	BT_FRAME(35)
    270 	BT_FRAME(36)
    271 	BT_FRAME(37)
    272 	BT_FRAME(38)
    273 	BT_FRAME(39)
    274 
    275 	BT_FRAME(40)
    276 	BT_FRAME(41)
    277 	BT_FRAME(42)
    278 	BT_FRAME(43)
    279 	BT_FRAME(44)
    280 	BT_FRAME(45)
    281 	BT_FRAME(46)
    282 	BT_FRAME(47)
    283 	BT_FRAME(48)
    284 	BT_FRAME(49)
    285 
    286 	BT_FRAME(50)
    287 	BT_FRAME(51)
    288 	BT_FRAME(52)
    289 	BT_FRAME(53)
    290 	BT_FRAME(54)
    291 	BT_FRAME(55)
    292 	BT_FRAME(56)
    293 	BT_FRAME(57)
    294 	BT_FRAME(58)
    295 	BT_FRAME(59)
    296 
    297 	BT_FRAME(60)
    298 	BT_FRAME(61)
    299 	BT_FRAME(62)
    300 	BT_FRAME(63)
    301 	BT_FRAME(64)
    302 	BT_FRAME(65)
    303 	BT_FRAME(66)
    304 	BT_FRAME(67)
    305 	BT_FRAME(68)
    306 	BT_FRAME(69)
    307 
    308 	BT_FRAME(70)
    309 	BT_FRAME(71)
    310 	BT_FRAME(72)
    311 	BT_FRAME(73)
    312 	BT_FRAME(74)
    313 	BT_FRAME(75)
    314 	BT_FRAME(76)
    315 	BT_FRAME(77)
    316 	BT_FRAME(78)
    317 	BT_FRAME(79)
    318 
    319 	BT_FRAME(80)
    320 	BT_FRAME(81)
    321 	BT_FRAME(82)
    322 	BT_FRAME(83)
    323 	BT_FRAME(84)
    324 	BT_FRAME(85)
    325 	BT_FRAME(86)
    326 	BT_FRAME(87)
    327 	BT_FRAME(88)
    328 	BT_FRAME(89)
    329 
    330 	BT_FRAME(90)
    331 	BT_FRAME(91)
    332 	BT_FRAME(92)
    333 	BT_FRAME(93)
    334 	BT_FRAME(94)
    335 	BT_FRAME(95)
    336 	BT_FRAME(96)
    337 	BT_FRAME(97)
    338 	BT_FRAME(98)
    339 	BT_FRAME(99)
    340 
    341 	BT_FRAME(100)
    342 	BT_FRAME(101)
    343 	BT_FRAME(102)
    344 	BT_FRAME(103)
    345 	BT_FRAME(104)
    346 	BT_FRAME(105)
    347 	BT_FRAME(106)
    348 	BT_FRAME(107)
    349 	BT_FRAME(108)
    350 	BT_FRAME(109)
    351 
    352 	BT_FRAME(110)
    353 	BT_FRAME(111)
    354 	BT_FRAME(112)
    355 	BT_FRAME(113)
    356 	BT_FRAME(114)
    357 	BT_FRAME(115)
    358 	BT_FRAME(116)
    359 	BT_FRAME(117)
    360 	BT_FRAME(118)
    361 	BT_FRAME(119)
    362 
    363 	BT_FRAME(120)
    364 	BT_FRAME(121)
    365 	BT_FRAME(122)
    366 	BT_FRAME(123)
    367 	BT_FRAME(124)
    368 	BT_FRAME(125)
    369 	BT_FRAME(126)
    370 	BT_FRAME(127)
    371 #undef BT_FRAME
    372 }
    373 #else
    374 void
    375 prof_backtrace(prof_bt_t *bt)
    376 {
    377 
    378 	cassert(config_prof);
    379 	not_reached();
    380 }
    381 #endif
    382 
    383 static malloc_mutex_t *
    384 prof_ctx_mutex_choose(void)
    385 {
    386 	unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
    387 
    388 	return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
    389 }
    390 
    391 static void
    392 prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
    393 {
    394 
    395 	ctx->bt = bt;
    396 	ctx->lock = prof_ctx_mutex_choose();
    397 	/*
    398 	 * Set nlimbo to 1, in order to avoid a race condition with
    399 	 * prof_ctx_merge()/prof_ctx_destroy().
    400 	 */
    401 	ctx->nlimbo = 1;
    402 	ql_elm_new(ctx, dump_link);
    403 	memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
    404 	ql_new(&ctx->cnts_ql);
    405 }
    406 
    407 static void
    408 prof_ctx_destroy(prof_ctx_t *ctx)
    409 {
    410 	prof_tdata_t *prof_tdata;
    411 
    412 	cassert(config_prof);
    413 
    414 	/*
    415 	 * Check that ctx is still unused by any thread cache before destroying
    416 	 * it.  prof_lookup() increments ctx->nlimbo in order to avoid a race
    417 	 * condition with this function, as does prof_ctx_merge() in order to
    418 	 * avoid a race between the main body of prof_ctx_merge() and entry
    419 	 * into this function.
    420 	 */
    421 	prof_tdata = prof_tdata_get(false);
    422 	assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
    423 	prof_enter(prof_tdata);
    424 	malloc_mutex_lock(ctx->lock);
    425 	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
    426 	    ctx->nlimbo == 1) {
    427 		assert(ctx->cnt_merged.curbytes == 0);
    428 		assert(ctx->cnt_merged.accumobjs == 0);
    429 		assert(ctx->cnt_merged.accumbytes == 0);
    430 		/* Remove ctx from bt2ctx. */
    431 		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
    432 			not_reached();
    433 		prof_leave(prof_tdata);
    434 		/* Destroy ctx. */
    435 		malloc_mutex_unlock(ctx->lock);
    436 		bt_destroy(ctx->bt);
    437 		idalloc(ctx);
    438 	} else {
    439 		/*
    440 		 * Compensate for increment in prof_ctx_merge() or
    441 		 * prof_lookup().
    442 		 */
    443 		ctx->nlimbo--;
    444 		malloc_mutex_unlock(ctx->lock);
    445 		prof_leave(prof_tdata);
    446 	}
    447 }
    448 
    449 static void
    450 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
    451 {
    452 	bool destroy;
    453 
    454 	cassert(config_prof);
    455 
    456 	/* Merge cnt stats and detach from ctx. */
    457 	malloc_mutex_lock(ctx->lock);
    458 	ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
    459 	ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
    460 	ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
    461 	ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
    462 	ql_remove(&ctx->cnts_ql, cnt, cnts_link);
    463 	if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
    464 	    ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
    465 		/*
    466 		 * Increment ctx->nlimbo in order to keep another thread from
    467 		 * winning the race to destroy ctx while this one has ctx->lock
    468 		 * dropped.  Without this, it would be possible for another
    469 		 * thread to:
    470 		 *
    471 		 * 1) Sample an allocation associated with ctx.
    472 		 * 2) Deallocate the sampled object.
    473 		 * 3) Successfully prof_ctx_destroy(ctx).
    474 		 *
    475 		 * The result would be that ctx no longer exists by the time
    476 		 * this thread accesses it in prof_ctx_destroy().
    477 		 */
    478 		ctx->nlimbo++;
    479 		destroy = true;
    480 	} else
    481 		destroy = false;
    482 	malloc_mutex_unlock(ctx->lock);
    483 	if (destroy)
    484 		prof_ctx_destroy(ctx);
    485 }
    486 
    487 static bool
    488 prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
    489     prof_ctx_t **p_ctx, bool *p_new_ctx)
    490 {
    491 	union {
    492 		prof_ctx_t	*p;
    493 		void		*v;
    494 	} ctx;
    495 	union {
    496 		prof_bt_t	*p;
    497 		void		*v;
    498 	} btkey;
    499 	bool new_ctx;
    500 
    501 	prof_enter(prof_tdata);
    502 	if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
    503 		/* bt has never been seen before.  Insert it. */
    504 		ctx.v = imalloc(sizeof(prof_ctx_t));
    505 		if (ctx.v == NULL) {
    506 			prof_leave(prof_tdata);
    507 			return (true);
    508 		}
    509 		btkey.p = bt_dup(bt);
    510 		if (btkey.v == NULL) {
    511 			prof_leave(prof_tdata);
    512 			idalloc(ctx.v);
    513 			return (true);
    514 		}
    515 		prof_ctx_init(ctx.p, btkey.p);
    516 		if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
    517 			/* OOM. */
    518 			prof_leave(prof_tdata);
    519 			idalloc(btkey.v);
    520 			idalloc(ctx.v);
    521 			return (true);
    522 		}
    523 		new_ctx = true;
    524 	} else {
    525 		/*
    526 		 * Increment nlimbo, in order to avoid a race condition with
    527 		 * prof_ctx_merge()/prof_ctx_destroy().
    528 		 */
    529 		malloc_mutex_lock(ctx.p->lock);
    530 		ctx.p->nlimbo++;
    531 		malloc_mutex_unlock(ctx.p->lock);
    532 		new_ctx = false;
    533 	}
    534 	prof_leave(prof_tdata);
    535 
    536 	*p_btkey = btkey.v;
    537 	*p_ctx = ctx.p;
    538 	*p_new_ctx = new_ctx;
    539 	return (false);
    540 }
    541 
    542 prof_thr_cnt_t *
    543 prof_lookup(prof_bt_t *bt)
    544 {
    545 	union {
    546 		prof_thr_cnt_t	*p;
    547 		void		*v;
    548 	} ret;
    549 	prof_tdata_t *prof_tdata;
    550 
    551 	cassert(config_prof);
    552 
    553 	prof_tdata = prof_tdata_get(false);
    554 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
    555 		return (NULL);
    556 
    557 	if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
    558 		void *btkey;
    559 		prof_ctx_t *ctx;
    560 		bool new_ctx;
    561 
    562 		/*
    563 		 * This thread's cache lacks bt.  Look for it in the global
    564 		 * cache.
    565 		 */
    566 		if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
    567 			return (NULL);
    568 
    569 		/* Link a prof_thd_cnt_t into ctx for this thread. */
    570 		if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
    571 			assert(ckh_count(&prof_tdata->bt2cnt) > 0);
    572 			/*
    573 			 * Flush the least recently used cnt in order to keep
    574 			 * bt2cnt from becoming too large.
    575 			 */
    576 			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
    577 			assert(ret.v != NULL);
    578 			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
    579 			    NULL, NULL))
    580 				not_reached();
    581 			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
    582 			prof_ctx_merge(ret.p->ctx, ret.p);
    583 			/* ret can now be re-used. */
    584 		} else {
    585 			assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
    586 			/* Allocate and partially initialize a new cnt. */
    587 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
    588 			if (ret.p == NULL) {
    589 				if (new_ctx)
    590 					prof_ctx_destroy(ctx);
    591 				return (NULL);
    592 			}
    593 			ql_elm_new(ret.p, cnts_link);
    594 			ql_elm_new(ret.p, lru_link);
    595 		}
    596 		/* Finish initializing ret. */
    597 		ret.p->ctx = ctx;
    598 		ret.p->epoch = 0;
    599 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
    600 		if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
    601 			if (new_ctx)
    602 				prof_ctx_destroy(ctx);
    603 			idalloc(ret.v);
    604 			return (NULL);
    605 		}
    606 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
    607 		malloc_mutex_lock(ctx->lock);
    608 		ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
    609 		ctx->nlimbo--;
    610 		malloc_mutex_unlock(ctx->lock);
    611 	} else {
    612 		/* Move ret to the front of the LRU. */
    613 		ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
    614 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
    615 	}
    616 
    617 	return (ret.p);
    618 }
    619 
    620 
    621 void
    622 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
    623 {
    624 	/*
    625 	 * The body of this function is compiled out unless heap profiling is
    626 	 * enabled, so that it is possible to compile jemalloc with floating
    627 	 * point support completely disabled.  Avoiding floating point code is
    628 	 * important on memory-constrained systems, but it also enables a
    629 	 * workaround for versions of glibc that don't properly save/restore
    630 	 * floating point registers during dynamic lazy symbol loading (which
    631 	 * internally calls into whatever malloc implementation happens to be
    632 	 * integrated into the application).  Note that some compilers (e.g.
    633 	 * gcc 4.8) may use floating point registers for fast memory moves, so
    634 	 * jemalloc must be compiled with such optimizations disabled (e.g.
    635 	 * -mno-sse) in order for the workaround to be complete.
    636 	 */
    637 #ifdef JEMALLOC_PROF
    638 	uint64_t r;
    639 	double u;
    640 
    641 	if (!config_prof)
    642 		return;
    643 
    644 	if (prof_tdata == NULL)
    645 		prof_tdata = prof_tdata_get(false);
    646 
    647 	if (opt_lg_prof_sample == 0) {
    648 		prof_tdata->bytes_until_sample = 0;
    649 		return;
    650 	}
    651 
    652 	/*
    653 	 * Compute sample threshold as a geometrically distributed random
    654 	 * variable with mean (2^opt_lg_prof_sample).
    655 	 *
    656 	 *                         __        __
    657 	 *                         |  log(u)  |                     1
    658 	 * prof_tdata->threshold = | -------- |, where p = -------------------
    659 	 *                         | log(1-p) |             opt_lg_prof_sample
    660 	 *                                                 2
    661 	 *
    662 	 * For more information on the math, see:
    663 	 *
    664 	 *   Non-Uniform Random Variate Generation
    665 	 *   Luc Devroye
    666 	 *   Springer-Verlag, New York, 1986
    667 	 *   pp 500
    668 	 *   (http://luc.devroye.org/rnbookindex.html)
    669 	 */
    670 	prng64(r, 53, prof_tdata->prng_state,
    671 	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
    672 	u = (double)r * (1.0/9007199254740992.0L);
    673 	prof_tdata->bytes_until_sample = (uint64_t)(log(u) /
    674 	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
    675 	    + (uint64_t)1U;
    676 #endif
    677 }
    678 
    679 
    680 #ifdef JEMALLOC_JET
    681 size_t
    682 prof_bt_count(void)
    683 {
    684 	size_t bt_count;
    685 	prof_tdata_t *prof_tdata;
    686 
    687 	prof_tdata = prof_tdata_get(false);
    688 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
    689 		return (0);
    690 
    691 	prof_enter(prof_tdata);
    692 	bt_count = ckh_count(&bt2ctx);
    693 	prof_leave(prof_tdata);
    694 
    695 	return (bt_count);
    696 }
    697 #endif
    698 
    699 #ifdef JEMALLOC_JET
    700 #undef prof_dump_open
    701 #define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
    702 #endif
    703 static int
    704 prof_dump_open(bool propagate_err, const char *filename)
    705 {
    706 	int fd;
    707 
    708 	fd = creat(filename, 0644);
    709 	if (fd == -1 && propagate_err == false) {
    710 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
    711 		    filename);
    712 		if (opt_abort)
    713 			abort();
    714 	}
    715 
    716 	return (fd);
    717 }
    718 #ifdef JEMALLOC_JET
    719 #undef prof_dump_open
    720 #define	prof_dump_open JEMALLOC_N(prof_dump_open)
    721 prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
    722 #endif
    723 
    724 static bool
    725 prof_dump_flush(bool propagate_err)
    726 {
    727 	bool ret = false;
    728 	ssize_t err;
    729 
    730 	cassert(config_prof);
    731 
    732 	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
    733 	if (err == -1) {
    734 		if (propagate_err == false) {
    735 			malloc_write("<jemalloc>: write() failed during heap "
    736 			    "profile flush\n");
    737 			if (opt_abort)
    738 				abort();
    739 		}
    740 		ret = true;
    741 	}
    742 	prof_dump_buf_end = 0;
    743 
    744 	return (ret);
    745 }
    746 
    747 static bool
    748 prof_dump_close(bool propagate_err)
    749 {
    750 	bool ret;
    751 
    752 	assert(prof_dump_fd != -1);
    753 	ret = prof_dump_flush(propagate_err);
    754 	close(prof_dump_fd);
    755 	prof_dump_fd = -1;
    756 
    757 	return (ret);
    758 }
    759 
    760 static bool
    761 prof_dump_write(bool propagate_err, const char *s)
    762 {
    763 	unsigned i, slen, n;
    764 
    765 	cassert(config_prof);
    766 
    767 	i = 0;
    768 	slen = strlen(s);
    769 	while (i < slen) {
    770 		/* Flush the buffer if it is full. */
    771 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
    772 			if (prof_dump_flush(propagate_err) && propagate_err)
    773 				return (true);
    774 
    775 		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
    776 			/* Finish writing. */
    777 			n = slen - i;
    778 		} else {
    779 			/* Write as much of s as will fit. */
    780 			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
    781 		}
    782 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
    783 		prof_dump_buf_end += n;
    784 		i += n;
    785 	}
    786 
    787 	return (false);
    788 }
    789 
    790 JEMALLOC_ATTR(format(printf, 2, 3))
    791 static bool
    792 prof_dump_printf(bool propagate_err, const char *format, ...)
    793 {
    794 	bool ret;
    795 	va_list ap;
    796 	char buf[PROF_PRINTF_BUFSIZE];
    797 
    798 	va_start(ap, format);
    799 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
    800 	va_end(ap);
    801 	ret = prof_dump_write(propagate_err, buf);
    802 
    803 	return (ret);
    804 }
    805 
    806 static void
    807 prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
    808     prof_ctx_list_t *ctx_ql)
    809 {
    810 	prof_thr_cnt_t *thr_cnt;
    811 	prof_cnt_t tcnt;
    812 
    813 	cassert(config_prof);
    814 
    815 	malloc_mutex_lock(ctx->lock);
    816 
    817 	/*
    818 	 * Increment nlimbo so that ctx won't go away before dump.
    819 	 * Additionally, link ctx into the dump list so that it is included in
    820 	 * prof_dump()'s second pass.
    821 	 */
    822 	ctx->nlimbo++;
    823 	ql_tail_insert(ctx_ql, ctx, dump_link);
    824 
    825 	memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
    826 	ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
    827 		volatile unsigned *epoch = &thr_cnt->epoch;
    828 
    829 		while (true) {
    830 			unsigned epoch0 = *epoch;
    831 
    832 			/* Make sure epoch is even. */
    833 			if (epoch0 & 1U)
    834 				continue;
    835 
    836 			memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
    837 
    838 			/* Terminate if epoch didn't change while reading. */
    839 			if (*epoch == epoch0)
    840 				break;
    841 		}
    842 
    843 		ctx->cnt_summed.curobjs += tcnt.curobjs;
    844 		ctx->cnt_summed.curbytes += tcnt.curbytes;
    845 		if (opt_prof_accum) {
    846 			ctx->cnt_summed.accumobjs += tcnt.accumobjs;
    847 			ctx->cnt_summed.accumbytes += tcnt.accumbytes;
    848 		}
    849 	}
    850 
    851 	if (ctx->cnt_summed.curobjs != 0)
    852 		(*leak_nctx)++;
    853 
    854 	/* Add to cnt_all. */
    855 	cnt_all->curobjs += ctx->cnt_summed.curobjs;
    856 	cnt_all->curbytes += ctx->cnt_summed.curbytes;
    857 	if (opt_prof_accum) {
    858 		cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
    859 		cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
    860 	}
    861 
    862 	malloc_mutex_unlock(ctx->lock);
    863 }
    864 
    865 static bool
    866 prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
    867 {
    868 
    869 	if (opt_lg_prof_sample == 0) {
    870 		if (prof_dump_printf(propagate_err,
    871 		    "heap profile: %"PRId64": %"PRId64
    872 		    " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
    873 		    cnt_all->curobjs, cnt_all->curbytes,
    874 		    cnt_all->accumobjs, cnt_all->accumbytes))
    875 			return (true);
    876 	} else {
    877 		if (prof_dump_printf(propagate_err,
    878 		    "heap profile: %"PRId64": %"PRId64
    879 		    " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
    880 		    cnt_all->curobjs, cnt_all->curbytes,
    881 		    cnt_all->accumobjs, cnt_all->accumbytes,
    882 		    ((uint64_t)1U << opt_lg_prof_sample)))
    883 			return (true);
    884 	}
    885 
    886 	return (false);
    887 }
    888 
    889 static void
    890 prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
    891 {
    892 
    893 	ctx->nlimbo--;
    894 	ql_remove(ctx_ql, ctx, dump_link);
    895 }
    896 
    897 static void
    898 prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
    899 {
    900 
    901 	malloc_mutex_lock(ctx->lock);
    902 	prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
    903 	malloc_mutex_unlock(ctx->lock);
    904 }
    905 
    906 static bool
    907 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
    908     prof_ctx_list_t *ctx_ql)
    909 {
    910 	bool ret;
    911 	unsigned i;
    912 
    913 	cassert(config_prof);
    914 
    915 	/*
    916 	 * Current statistics can sum to 0 as a result of unmerged per thread
    917 	 * statistics.  Additionally, interval- and growth-triggered dumps can
    918 	 * occur between the time a ctx is created and when its statistics are
    919 	 * filled in.  Avoid dumping any ctx that is an artifact of either
    920 	 * implementation detail.
    921 	 */
    922 	malloc_mutex_lock(ctx->lock);
    923 	if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
    924 	    (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
    925 		assert(ctx->cnt_summed.curobjs == 0);
    926 		assert(ctx->cnt_summed.curbytes == 0);
    927 		assert(ctx->cnt_summed.accumobjs == 0);
    928 		assert(ctx->cnt_summed.accumbytes == 0);
    929 		ret = false;
    930 		goto label_return;
    931 	}
    932 
    933 	if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
    934 	    " [%"PRIu64": %"PRIu64"] @",
    935 	    ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
    936 	    ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
    937 		ret = true;
    938 		goto label_return;
    939 	}
    940 
    941 	for (i = 0; i < bt->len; i++) {
    942 		if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
    943 		    (uintptr_t)bt->vec[i])) {
    944 			ret = true;
    945 			goto label_return;
    946 		}
    947 	}
    948 
    949 	if (prof_dump_write(propagate_err, "\n")) {
    950 		ret = true;
    951 		goto label_return;
    952 	}
    953 
    954 	ret = false;
    955 label_return:
    956 	prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
    957 	malloc_mutex_unlock(ctx->lock);
    958 	return (ret);
    959 }
    960 
    961 static bool
    962 prof_dump_maps(bool propagate_err)
    963 {
    964 	bool ret;
    965 	int mfd;
    966 	char filename[PATH_MAX + 1];
    967 
    968 	cassert(config_prof);
    969 #ifdef __FreeBSD__
    970 	malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map");
    971 #else
    972 	malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
    973 	    (int)getpid());
    974 #endif
    975 	mfd = open(filename, O_RDONLY);
    976 	if (mfd != -1) {
    977 		ssize_t nread;
    978 
    979 		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
    980 		    propagate_err) {
    981 			ret = true;
    982 			goto label_return;
    983 		}
    984 		nread = 0;
    985 		do {
    986 			prof_dump_buf_end += nread;
    987 			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
    988 				/* Make space in prof_dump_buf before read(). */
    989 				if (prof_dump_flush(propagate_err) &&
    990 				    propagate_err) {
    991 					ret = true;
    992 					goto label_return;
    993 				}
    994 			}
    995 			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
    996 			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
    997 		} while (nread > 0);
    998 	} else {
    999 		ret = true;
   1000 		goto label_return;
   1001 	}
   1002 
   1003 	ret = false;
   1004 label_return:
   1005 	if (mfd != -1)
   1006 		close(mfd);
   1007 	return (ret);
   1008 }
   1009 
   1010 static void
   1011 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
   1012     const char *filename)
   1013 {
   1014 
   1015 	if (cnt_all->curbytes != 0) {
   1016 		malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
   1017 		    PRId64" object%s, %zu context%s\n",
   1018 		    cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
   1019 		    cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
   1020 		    leak_nctx, (leak_nctx != 1) ? "s" : "");
   1021 		malloc_printf(
   1022 		    "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
   1023 		    filename);
   1024 	}
   1025 }
   1026 
   1027 static bool
   1028 prof_dump(bool propagate_err, const char *filename, bool leakcheck)
   1029 {
   1030 	prof_tdata_t *prof_tdata;
   1031 	prof_cnt_t cnt_all;
   1032 	size_t tabind;
   1033 	union {
   1034 		prof_ctx_t	*p;
   1035 		void		*v;
   1036 	} ctx;
   1037 	size_t leak_nctx;
   1038 	prof_ctx_list_t ctx_ql;
   1039 
   1040 	cassert(config_prof);
   1041 
   1042 	prof_tdata = prof_tdata_get(false);
   1043 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
   1044 		return (true);
   1045 
   1046 	malloc_mutex_lock(&prof_dump_mtx);
   1047 
   1048 	/* Merge per thread profile stats, and sum them in cnt_all. */
   1049 	memset(&cnt_all, 0, sizeof(prof_cnt_t));
   1050 	leak_nctx = 0;
   1051 	ql_new(&ctx_ql);
   1052 	prof_enter(prof_tdata);
   1053 	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
   1054 		prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
   1055 	prof_leave(prof_tdata);
   1056 
   1057 	/* Create dump file. */
   1058 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
   1059 		goto label_open_close_error;
   1060 
   1061 	/* Dump profile header. */
   1062 	if (prof_dump_header(propagate_err, &cnt_all))
   1063 		goto label_write_error;
   1064 
   1065 	/* Dump per ctx profile stats. */
   1066 	while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
   1067 		if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
   1068 			goto label_write_error;
   1069 	}
   1070 
   1071 	/* Dump /proc/<pid>/maps if possible. */
   1072 	if (prof_dump_maps(propagate_err))
   1073 		goto label_write_error;
   1074 
   1075 	if (prof_dump_close(propagate_err))
   1076 		goto label_open_close_error;
   1077 
   1078 	malloc_mutex_unlock(&prof_dump_mtx);
   1079 
   1080 	if (leakcheck)
   1081 		prof_leakcheck(&cnt_all, leak_nctx, filename);
   1082 
   1083 	return (false);
   1084 label_write_error:
   1085 	prof_dump_close(propagate_err);
   1086 label_open_close_error:
   1087 	while ((ctx.p = ql_first(&ctx_ql)) != NULL)
   1088 		prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
   1089 	malloc_mutex_unlock(&prof_dump_mtx);
   1090 	return (true);
   1091 }
   1092 
   1093 #define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
   1094 #define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
   1095 static void
   1096 prof_dump_filename(char *filename, char v, uint64_t vseq)
   1097 {
   1098 
   1099 	cassert(config_prof);
   1100 
   1101 	if (vseq != VSEQ_INVALID) {
   1102 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
   1103 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
   1104 		    "%s.%d.%"PRIu64".%c%"PRIu64".heap",
   1105 		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
   1106 	} else {
   1107 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
   1108 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
   1109 		    "%s.%d.%"PRIu64".%c.heap",
   1110 		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
   1111 	}
   1112 	prof_dump_seq++;
   1113 }
   1114 
   1115 static void
   1116 prof_fdump(void)
   1117 {
   1118 	char filename[DUMP_FILENAME_BUFSIZE];
   1119 
   1120 	cassert(config_prof);
   1121 
   1122 	if (prof_booted == false)
   1123 		return;
   1124 
   1125 	if (opt_prof_final && opt_prof_prefix[0] != '\0') {
   1126 		malloc_mutex_lock(&prof_dump_seq_mtx);
   1127 		prof_dump_filename(filename, 'f', VSEQ_INVALID);
   1128 		malloc_mutex_unlock(&prof_dump_seq_mtx);
   1129 		prof_dump(false, filename, opt_prof_leak);
   1130 	}
   1131 }
   1132 
   1133 void
   1134 prof_idump(void)
   1135 {
   1136 	prof_tdata_t *prof_tdata;
   1137 	char filename[PATH_MAX + 1];
   1138 
   1139 	cassert(config_prof);
   1140 
   1141 	if (prof_booted == false)
   1142 		return;
   1143 	prof_tdata = prof_tdata_get(false);
   1144 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
   1145 		return;
   1146 	if (prof_tdata->enq) {
   1147 		prof_tdata->enq_idump = true;
   1148 		return;
   1149 	}
   1150 
   1151 	if (opt_prof_prefix[0] != '\0') {
   1152 		malloc_mutex_lock(&prof_dump_seq_mtx);
   1153 		prof_dump_filename(filename, 'i', prof_dump_iseq);
   1154 		prof_dump_iseq++;
   1155 		malloc_mutex_unlock(&prof_dump_seq_mtx);
   1156 		prof_dump(false, filename, false);
   1157 	}
   1158 }
   1159 
   1160 bool
   1161 prof_mdump(const char *filename)
   1162 {
   1163 	char filename_buf[DUMP_FILENAME_BUFSIZE];
   1164 
   1165 	cassert(config_prof);
   1166 
   1167 	if (opt_prof == false || prof_booted == false)
   1168 		return (true);
   1169 
   1170 	if (filename == NULL) {
   1171 		/* No filename specified, so automatically generate one. */
   1172 		if (opt_prof_prefix[0] == '\0')
   1173 			return (true);
   1174 		malloc_mutex_lock(&prof_dump_seq_mtx);
   1175 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
   1176 		prof_dump_mseq++;
   1177 		malloc_mutex_unlock(&prof_dump_seq_mtx);
   1178 		filename = filename_buf;
   1179 	}
   1180 	return (prof_dump(true, filename, false));
   1181 }
   1182 
   1183 void
   1184 prof_gdump(void)
   1185 {
   1186 	prof_tdata_t *prof_tdata;
   1187 	char filename[DUMP_FILENAME_BUFSIZE];
   1188 
   1189 	cassert(config_prof);
   1190 
   1191 	if (prof_booted == false)
   1192 		return;
   1193 	prof_tdata = prof_tdata_get(false);
   1194 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
   1195 		return;
   1196 	if (prof_tdata->enq) {
   1197 		prof_tdata->enq_gdump = true;
   1198 		return;
   1199 	}
   1200 
   1201 	if (opt_prof_prefix[0] != '\0') {
   1202 		malloc_mutex_lock(&prof_dump_seq_mtx);
   1203 		prof_dump_filename(filename, 'u', prof_dump_useq);
   1204 		prof_dump_useq++;
   1205 		malloc_mutex_unlock(&prof_dump_seq_mtx);
   1206 		prof_dump(false, filename, false);
   1207 	}
   1208 }
   1209 
   1210 static void
   1211 prof_bt_hash(const void *key, size_t r_hash[2])
   1212 {
   1213 	prof_bt_t *bt = (prof_bt_t *)key;
   1214 
   1215 	cassert(config_prof);
   1216 
   1217 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
   1218 }
   1219 
   1220 static bool
   1221 prof_bt_keycomp(const void *k1, const void *k2)
   1222 {
   1223 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
   1224 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
   1225 
   1226 	cassert(config_prof);
   1227 
   1228 	if (bt1->len != bt2->len)
   1229 		return (false);
   1230 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
   1231 }
   1232 
   1233 prof_tdata_t *
   1234 prof_tdata_init(void)
   1235 {
   1236 	prof_tdata_t *prof_tdata;
   1237 
   1238 	cassert(config_prof);
   1239 
   1240 	/* Initialize an empty cache for this thread. */
   1241 	prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
   1242 	if (prof_tdata == NULL)
   1243 		return (NULL);
   1244 
   1245 	if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
   1246 	    prof_bt_hash, prof_bt_keycomp)) {
   1247 		idalloc(prof_tdata);
   1248 		return (NULL);
   1249 	}
   1250 	ql_new(&prof_tdata->lru_ql);
   1251 
   1252 	prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
   1253 	if (prof_tdata->vec == NULL) {
   1254 		ckh_delete(&prof_tdata->bt2cnt);
   1255 		idalloc(prof_tdata);
   1256 		return (NULL);
   1257 	}
   1258 
   1259 	prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata;
   1260 	prof_sample_threshold_update(prof_tdata);
   1261 
   1262 	prof_tdata->enq = false;
   1263 	prof_tdata->enq_idump = false;
   1264 	prof_tdata->enq_gdump = false;
   1265 
   1266 	prof_tdata_tsd_set(&prof_tdata);
   1267 
   1268 	return (prof_tdata);
   1269 }
   1270 
   1271 void
   1272 prof_tdata_cleanup(void *arg)
   1273 {
   1274 	prof_thr_cnt_t *cnt;
   1275 	prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
   1276 
   1277 	cassert(config_prof);
   1278 
   1279 	if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
   1280 		/*
   1281 		 * Another destructor deallocated memory after this destructor
   1282 		 * was called.  Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
   1283 		 * in order to receive another callback.
   1284 		 */
   1285 		prof_tdata = PROF_TDATA_STATE_PURGATORY;
   1286 		prof_tdata_tsd_set(&prof_tdata);
   1287 	} else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
   1288 		/*
   1289 		 * The previous time this destructor was called, we set the key
   1290 		 * to PROF_TDATA_STATE_PURGATORY so that other destructors
   1291 		 * wouldn't cause re-creation of the prof_tdata.  This time, do
   1292 		 * nothing, so that the destructor will not be called again.
   1293 		 */
   1294 	} else if (prof_tdata != NULL) {
   1295 		/*
   1296 		 * Delete the hash table.  All of its contents can still be
   1297 		 * iterated over via the LRU.
   1298 		 */
   1299 		ckh_delete(&prof_tdata->bt2cnt);
   1300 		/*
   1301 		 * Iteratively merge cnt's into the global stats and delete
   1302 		 * them.
   1303 		 */
   1304 		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
   1305 			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
   1306 			prof_ctx_merge(cnt->ctx, cnt);
   1307 			idalloc(cnt);
   1308 		}
   1309 		idalloc(prof_tdata->vec);
   1310 		idalloc(prof_tdata);
   1311 		prof_tdata = PROF_TDATA_STATE_PURGATORY;
   1312 		prof_tdata_tsd_set(&prof_tdata);
   1313 	}
   1314 }
   1315 
   1316 void
   1317 prof_boot0(void)
   1318 {
   1319 
   1320 	cassert(config_prof);
   1321 
   1322 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
   1323 	    sizeof(PROF_PREFIX_DEFAULT));
   1324 }
   1325 
   1326 void
   1327 prof_boot1(void)
   1328 {
   1329 
   1330 	cassert(config_prof);
   1331 
   1332 	/*
   1333 	 * opt_prof must be in its final state before any arenas are
   1334 	 * initialized, so this function must be executed early.
   1335 	 */
   1336 
   1337 	if (opt_prof_leak && opt_prof == false) {
   1338 		/*
   1339 		 * Enable opt_prof, but in such a way that profiles are never
   1340 		 * automatically dumped.
   1341 		 */
   1342 		opt_prof = true;
   1343 		opt_prof_gdump = false;
   1344 	} else if (opt_prof) {
   1345 		if (opt_lg_prof_interval >= 0) {
   1346 			prof_interval = (((uint64_t)1U) <<
   1347 			    opt_lg_prof_interval);
   1348 		}
   1349 	}
   1350 }
   1351 
   1352 bool
   1353 prof_boot2(void)
   1354 {
   1355 
   1356 	cassert(config_prof);
   1357 
   1358 	if (opt_prof) {
   1359 		unsigned i;
   1360 
   1361 		if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
   1362 		    prof_bt_keycomp))
   1363 			return (true);
   1364 		if (malloc_mutex_init(&bt2ctx_mtx))
   1365 			return (true);
   1366 		if (prof_tdata_tsd_boot()) {
   1367 			malloc_write(
   1368 			    "<jemalloc>: Error in pthread_key_create()\n");
   1369 			abort();
   1370 		}
   1371 
   1372 		if (malloc_mutex_init(&prof_dump_seq_mtx))
   1373 			return (true);
   1374 		if (malloc_mutex_init(&prof_dump_mtx))
   1375 			return (true);
   1376 
   1377 		if (atexit(prof_fdump) != 0) {
   1378 			malloc_write("<jemalloc>: Error in atexit()\n");
   1379 			if (opt_abort)
   1380 				abort();
   1381 		}
   1382 
   1383 		ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
   1384 		    sizeof(malloc_mutex_t));
   1385 		if (ctx_locks == NULL)
   1386 			return (true);
   1387 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
   1388 			if (malloc_mutex_init(&ctx_locks[i]))
   1389 				return (true);
   1390 		}
   1391 	}
   1392 
   1393 #ifdef JEMALLOC_PROF_LIBGCC
   1394 	/*
   1395 	 * Cause the backtracing machinery to allocate its internal state
   1396 	 * before enabling profiling.
   1397 	 */
   1398 	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
   1399 #endif
   1400 
   1401 	prof_booted = true;
   1402 
   1403 	return (false);
   1404 }
   1405 
   1406 void
   1407 prof_prefork(void)
   1408 {
   1409 
   1410 	if (opt_prof) {
   1411 		unsigned i;
   1412 
   1413 		malloc_mutex_prefork(&bt2ctx_mtx);
   1414 		malloc_mutex_prefork(&prof_dump_seq_mtx);
   1415 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
   1416 			malloc_mutex_prefork(&ctx_locks[i]);
   1417 	}
   1418 }
   1419 
   1420 void
   1421 prof_postfork_parent(void)
   1422 {
   1423 
   1424 	if (opt_prof) {
   1425 		unsigned i;
   1426 
   1427 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
   1428 			malloc_mutex_postfork_parent(&ctx_locks[i]);
   1429 		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
   1430 		malloc_mutex_postfork_parent(&bt2ctx_mtx);
   1431 	}
   1432 }
   1433 
   1434 void
   1435 prof_postfork_child(void)
   1436 {
   1437 
   1438 	if (opt_prof) {
   1439 		unsigned i;
   1440 
   1441 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
   1442 			malloc_mutex_postfork_child(&ctx_locks[i]);
   1443 		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
   1444 		malloc_mutex_postfork_child(&bt2ctx_mtx);
   1445 	}
   1446 }
   1447 
   1448 /******************************************************************************/
   1449