Home | History | Annotate | Download | only in src
      1 #define JEMALLOC_PROF_C_
      2 #include "jemalloc/internal/jemalloc_preamble.h"
      3 #include "jemalloc/internal/jemalloc_internal_includes.h"
      4 
      5 #include "jemalloc/internal/assert.h"
      6 #include "jemalloc/internal/ckh.h"
      7 #include "jemalloc/internal/hash.h"
      8 #include "jemalloc/internal/malloc_io.h"
      9 #include "jemalloc/internal/mutex.h"
     10 
     11 /******************************************************************************/
     12 
     13 #ifdef JEMALLOC_PROF_LIBUNWIND
     14 #define UNW_LOCAL_ONLY
     15 #include <libunwind.h>
     16 #endif
     17 
     18 #ifdef JEMALLOC_PROF_LIBGCC
     19 /*
     20  * We have a circular dependency -- jemalloc_internal.h tells us if we should
     21  * use libgcc's unwinding functionality, but after we've included that, we've
     22  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
     23  */
     24 #undef _Unwind_Backtrace
     25 #include <unwind.h>
     26 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
     27 #endif
     28 
     29 /******************************************************************************/
     30 /* Data. */
     31 
     32 bool		opt_prof = false;
     33 bool		opt_prof_active = true;
     34 bool		opt_prof_thread_active_init = true;
     35 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
     36 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
     37 bool		opt_prof_gdump = false;
     38 bool		opt_prof_final = false;
     39 bool		opt_prof_leak = false;
     40 bool		opt_prof_accum = false;
     41 char		opt_prof_prefix[
     42     /* Minimize memory bloat for non-prof builds. */
     43 #ifdef JEMALLOC_PROF
     44     PATH_MAX +
     45 #endif
     46     1];
     47 
     48 /*
     49  * Initialized as opt_prof_active, and accessed via
     50  * prof_active_[gs]et{_unlocked,}().
     51  */
     52 bool			prof_active;
     53 static malloc_mutex_t	prof_active_mtx;
     54 
     55 /*
     56  * Initialized as opt_prof_thread_active_init, and accessed via
     57  * prof_thread_active_init_[gs]et().
     58  */
     59 static bool		prof_thread_active_init;
     60 static malloc_mutex_t	prof_thread_active_init_mtx;
     61 
     62 /*
     63  * Initialized as opt_prof_gdump, and accessed via
     64  * prof_gdump_[gs]et{_unlocked,}().
     65  */
     66 bool			prof_gdump_val;
     67 static malloc_mutex_t	prof_gdump_mtx;
     68 
     69 uint64_t	prof_interval = 0;
     70 
     71 size_t		lg_prof_sample;
     72 
     73 /*
     74  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
     75  * there is no problem with using them for more than one gctx at the same time.
     76  * The primary motivation for this sharing though is that gctx's are ephemeral,
     77  * and destroying mutexes causes complications for systems that allocate when
     78  * creating/destroying mutexes.
     79  */
     80 static malloc_mutex_t	*gctx_locks;
     81 static atomic_u_t	cum_gctxs; /* Atomic counter. */
     82 
     83 /*
     84  * Table of mutexes that are shared among tdata's.  No operations require
     85  * holding multiple tdata locks, so there is no problem with using them for more
     86  * than one tdata at the same time, even though a gctx lock may be acquired
     87  * while holding a tdata lock.
     88  */
     89 static malloc_mutex_t	*tdata_locks;
     90 
     91 /*
     92  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
     93  * structure that knows about all backtraces currently captured.
     94  */
     95 static ckh_t		bt2gctx;
     96 /* Non static to enable profiling. */
     97 malloc_mutex_t		bt2gctx_mtx;
     98 
     99 /*
    100  * Tree of all extant prof_tdata_t structures, regardless of state,
    101  * {attached,detached,expired}.
    102  */
    103 static prof_tdata_tree_t	tdatas;
    104 static malloc_mutex_t	tdatas_mtx;
    105 
    106 static uint64_t		next_thr_uid;
    107 static malloc_mutex_t	next_thr_uid_mtx;
    108 
    109 static malloc_mutex_t	prof_dump_seq_mtx;
    110 static uint64_t		prof_dump_seq;
    111 static uint64_t		prof_dump_iseq;
    112 static uint64_t		prof_dump_mseq;
    113 static uint64_t		prof_dump_useq;
    114 
    115 /*
    116  * This buffer is rather large for stack allocation, so use a single buffer for
    117  * all profile dumps.
    118  */
    119 static malloc_mutex_t	prof_dump_mtx;
    120 static char		prof_dump_buf[
    121     /* Minimize memory bloat for non-prof builds. */
    122 #ifdef JEMALLOC_PROF
    123     PROF_DUMP_BUFSIZE
    124 #else
    125     1
    126 #endif
    127 ];
    128 static size_t		prof_dump_buf_end;
    129 static int		prof_dump_fd;
    130 
    131 /* Do not dump any profiles until bootstrapping is complete. */
    132 static bool		prof_booted = false;
    133 
    134 /******************************************************************************/
    135 /*
    136  * Function prototypes for static functions that are referenced prior to
    137  * definition.
    138  */
    139 
    140 static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
    141 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
    142 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
    143     bool even_if_attached);
    144 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
    145     bool even_if_attached);
    146 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
    147 
    148 /******************************************************************************/
    149 /* Red-black trees. */
    150 
    151 static int
    152 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
    153 	uint64_t a_thr_uid = a->thr_uid;
    154 	uint64_t b_thr_uid = b->thr_uid;
    155 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
    156 	if (ret == 0) {
    157 		uint64_t a_thr_discrim = a->thr_discrim;
    158 		uint64_t b_thr_discrim = b->thr_discrim;
    159 		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
    160 		    b_thr_discrim);
    161 		if (ret == 0) {
    162 			uint64_t a_tctx_uid = a->tctx_uid;
    163 			uint64_t b_tctx_uid = b->tctx_uid;
    164 			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
    165 			    b_tctx_uid);
    166 		}
    167 	}
    168 	return ret;
    169 }
    170 
    171 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
    172     tctx_link, prof_tctx_comp)
    173 
    174 static int
    175 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
    176 	unsigned a_len = a->bt.len;
    177 	unsigned b_len = b->bt.len;
    178 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
    179 	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
    180 	if (ret == 0) {
    181 		ret = (a_len > b_len) - (a_len < b_len);
    182 	}
    183 	return ret;
    184 }
    185 
    186 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
    187     prof_gctx_comp)
    188 
    189 static int
    190 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
    191 	int ret;
    192 	uint64_t a_uid = a->thr_uid;
    193 	uint64_t b_uid = b->thr_uid;
    194 
    195 	ret = ((a_uid > b_uid) - (a_uid < b_uid));
    196 	if (ret == 0) {
    197 		uint64_t a_discrim = a->thr_discrim;
    198 		uint64_t b_discrim = b->thr_discrim;
    199 
    200 		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
    201 	}
    202 	return ret;
    203 }
    204 
    205 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
    206     prof_tdata_comp)
    207 
    208 /******************************************************************************/
    209 
    210 void
    211 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
    212 	prof_tdata_t *tdata;
    213 
    214 	cassert(config_prof);
    215 
    216 	if (updated) {
    217 		/*
    218 		 * Compute a new sample threshold.  This isn't very important in
    219 		 * practice, because this function is rarely executed, so the
    220 		 * potential for sample bias is minimal except in contrived
    221 		 * programs.
    222 		 */
    223 		tdata = prof_tdata_get(tsd, true);
    224 		if (tdata != NULL) {
    225 			prof_sample_threshold_update(tdata);
    226 		}
    227 	}
    228 
    229 	if ((uintptr_t)tctx > (uintptr_t)1U) {
    230 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
    231 		tctx->prepared = false;
    232 		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
    233 			prof_tctx_destroy(tsd, tctx);
    234 		} else {
    235 			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
    236 		}
    237 	}
    238 }
    239 
    240 void
    241 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
    242     prof_tctx_t *tctx) {
    243 	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
    244 
    245 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
    246 	tctx->cnts.curobjs++;
    247 	tctx->cnts.curbytes += usize;
    248 	if (opt_prof_accum) {
    249 		tctx->cnts.accumobjs++;
    250 		tctx->cnts.accumbytes += usize;
    251 	}
    252 	tctx->prepared = false;
    253 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
    254 }
    255 
    256 void
    257 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
    258 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
    259 	assert(tctx->cnts.curobjs > 0);
    260 	assert(tctx->cnts.curbytes >= usize);
    261 	tctx->cnts.curobjs--;
    262 	tctx->cnts.curbytes -= usize;
    263 
    264 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
    265 		prof_tctx_destroy(tsd, tctx);
    266 	} else {
    267 		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
    268 	}
    269 }
    270 
    271 void
    272 bt_init(prof_bt_t *bt, void **vec) {
    273 	cassert(config_prof);
    274 
    275 	bt->vec = vec;
    276 	bt->len = 0;
    277 }
    278 
    279 static void
    280 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
    281 	cassert(config_prof);
    282 	assert(tdata == prof_tdata_get(tsd, false));
    283 
    284 	if (tdata != NULL) {
    285 		assert(!tdata->enq);
    286 		tdata->enq = true;
    287 	}
    288 
    289 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
    290 }
    291 
    292 static void
    293 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
    294 	cassert(config_prof);
    295 	assert(tdata == prof_tdata_get(tsd, false));
    296 
    297 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
    298 
    299 	if (tdata != NULL) {
    300 		bool idump, gdump;
    301 
    302 		assert(tdata->enq);
    303 		tdata->enq = false;
    304 		idump = tdata->enq_idump;
    305 		tdata->enq_idump = false;
    306 		gdump = tdata->enq_gdump;
    307 		tdata->enq_gdump = false;
    308 
    309 		if (idump) {
    310 			prof_idump(tsd_tsdn(tsd));
    311 		}
    312 		if (gdump) {
    313 			prof_gdump(tsd_tsdn(tsd));
    314 		}
    315 	}
    316 }
    317 
    318 #ifdef JEMALLOC_PROF_LIBUNWIND
    319 void
    320 prof_backtrace(prof_bt_t *bt) {
    321 	int nframes;
    322 
    323 	cassert(config_prof);
    324 	assert(bt->len == 0);
    325 	assert(bt->vec != NULL);
    326 
    327 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
    328 	if (nframes <= 0) {
    329 		return;
    330 	}
    331 	bt->len = nframes;
    332 }
    333 #elif (defined(JEMALLOC_PROF_LIBGCC))
    334 static _Unwind_Reason_Code
    335 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
    336 	cassert(config_prof);
    337 
    338 	return _URC_NO_REASON;
    339 }
    340 
    341 static _Unwind_Reason_Code
    342 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
    343 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
    344 	void *ip;
    345 
    346 	cassert(config_prof);
    347 
    348 	ip = (void *)_Unwind_GetIP(context);
    349 	if (ip == NULL) {
    350 		return _URC_END_OF_STACK;
    351 	}
    352 	data->bt->vec[data->bt->len] = ip;
    353 	data->bt->len++;
    354 	if (data->bt->len == data->max) {
    355 		return _URC_END_OF_STACK;
    356 	}
    357 
    358 	return _URC_NO_REASON;
    359 }
    360 
    361 void
    362 prof_backtrace(prof_bt_t *bt) {
    363 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
    364 
    365 	cassert(config_prof);
    366 
    367 	_Unwind_Backtrace(prof_unwind_callback, &data);
    368 }
    369 #elif (defined(JEMALLOC_PROF_GCC))
    370 void
    371 prof_backtrace(prof_bt_t *bt) {
    372 #define BT_FRAME(i)							\
    373 	if ((i) < PROF_BT_MAX) {					\
    374 		void *p;						\
    375 		if (__builtin_frame_address(i) == 0) {			\
    376 			return;						\
    377 		}							\
    378 		p = __builtin_return_address(i);			\
    379 		if (p == NULL) {					\
    380 			return;						\
    381 		}							\
    382 		bt->vec[(i)] = p;					\
    383 		bt->len = (i) + 1;					\
    384 	} else {							\
    385 		return;							\
    386 	}
    387 
    388 	cassert(config_prof);
    389 
    390 	BT_FRAME(0)
    391 	BT_FRAME(1)
    392 	BT_FRAME(2)
    393 	BT_FRAME(3)
    394 	BT_FRAME(4)
    395 	BT_FRAME(5)
    396 	BT_FRAME(6)
    397 	BT_FRAME(7)
    398 	BT_FRAME(8)
    399 	BT_FRAME(9)
    400 
    401 	BT_FRAME(10)
    402 	BT_FRAME(11)
    403 	BT_FRAME(12)
    404 	BT_FRAME(13)
    405 	BT_FRAME(14)
    406 	BT_FRAME(15)
    407 	BT_FRAME(16)
    408 	BT_FRAME(17)
    409 	BT_FRAME(18)
    410 	BT_FRAME(19)
    411 
    412 	BT_FRAME(20)
    413 	BT_FRAME(21)
    414 	BT_FRAME(22)
    415 	BT_FRAME(23)
    416 	BT_FRAME(24)
    417 	BT_FRAME(25)
    418 	BT_FRAME(26)
    419 	BT_FRAME(27)
    420 	BT_FRAME(28)
    421 	BT_FRAME(29)
    422 
    423 	BT_FRAME(30)
    424 	BT_FRAME(31)
    425 	BT_FRAME(32)
    426 	BT_FRAME(33)
    427 	BT_FRAME(34)
    428 	BT_FRAME(35)
    429 	BT_FRAME(36)
    430 	BT_FRAME(37)
    431 	BT_FRAME(38)
    432 	BT_FRAME(39)
    433 
    434 	BT_FRAME(40)
    435 	BT_FRAME(41)
    436 	BT_FRAME(42)
    437 	BT_FRAME(43)
    438 	BT_FRAME(44)
    439 	BT_FRAME(45)
    440 	BT_FRAME(46)
    441 	BT_FRAME(47)
    442 	BT_FRAME(48)
    443 	BT_FRAME(49)
    444 
    445 	BT_FRAME(50)
    446 	BT_FRAME(51)
    447 	BT_FRAME(52)
    448 	BT_FRAME(53)
    449 	BT_FRAME(54)
    450 	BT_FRAME(55)
    451 	BT_FRAME(56)
    452 	BT_FRAME(57)
    453 	BT_FRAME(58)
    454 	BT_FRAME(59)
    455 
    456 	BT_FRAME(60)
    457 	BT_FRAME(61)
    458 	BT_FRAME(62)
    459 	BT_FRAME(63)
    460 	BT_FRAME(64)
    461 	BT_FRAME(65)
    462 	BT_FRAME(66)
    463 	BT_FRAME(67)
    464 	BT_FRAME(68)
    465 	BT_FRAME(69)
    466 
    467 	BT_FRAME(70)
    468 	BT_FRAME(71)
    469 	BT_FRAME(72)
    470 	BT_FRAME(73)
    471 	BT_FRAME(74)
    472 	BT_FRAME(75)
    473 	BT_FRAME(76)
    474 	BT_FRAME(77)
    475 	BT_FRAME(78)
    476 	BT_FRAME(79)
    477 
    478 	BT_FRAME(80)
    479 	BT_FRAME(81)
    480 	BT_FRAME(82)
    481 	BT_FRAME(83)
    482 	BT_FRAME(84)
    483 	BT_FRAME(85)
    484 	BT_FRAME(86)
    485 	BT_FRAME(87)
    486 	BT_FRAME(88)
    487 	BT_FRAME(89)
    488 
    489 	BT_FRAME(90)
    490 	BT_FRAME(91)
    491 	BT_FRAME(92)
    492 	BT_FRAME(93)
    493 	BT_FRAME(94)
    494 	BT_FRAME(95)
    495 	BT_FRAME(96)
    496 	BT_FRAME(97)
    497 	BT_FRAME(98)
    498 	BT_FRAME(99)
    499 
    500 	BT_FRAME(100)
    501 	BT_FRAME(101)
    502 	BT_FRAME(102)
    503 	BT_FRAME(103)
    504 	BT_FRAME(104)
    505 	BT_FRAME(105)
    506 	BT_FRAME(106)
    507 	BT_FRAME(107)
    508 	BT_FRAME(108)
    509 	BT_FRAME(109)
    510 
    511 	BT_FRAME(110)
    512 	BT_FRAME(111)
    513 	BT_FRAME(112)
    514 	BT_FRAME(113)
    515 	BT_FRAME(114)
    516 	BT_FRAME(115)
    517 	BT_FRAME(116)
    518 	BT_FRAME(117)
    519 	BT_FRAME(118)
    520 	BT_FRAME(119)
    521 
    522 	BT_FRAME(120)
    523 	BT_FRAME(121)
    524 	BT_FRAME(122)
    525 	BT_FRAME(123)
    526 	BT_FRAME(124)
    527 	BT_FRAME(125)
    528 	BT_FRAME(126)
    529 	BT_FRAME(127)
    530 #undef BT_FRAME
    531 }
    532 #else
    533 void
    534 prof_backtrace(prof_bt_t *bt) {
    535 	cassert(config_prof);
    536 	not_reached();
    537 }
    538 #endif
    539 
    540 static malloc_mutex_t *
    541 prof_gctx_mutex_choose(void) {
    542 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
    543 
    544 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
    545 }
    546 
    547 static malloc_mutex_t *
    548 prof_tdata_mutex_choose(uint64_t thr_uid) {
    549 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
    550 }
    551 
    552 static prof_gctx_t *
    553 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
    554 	/*
    555 	 * Create a single allocation that has space for vec of length bt->len.
    556 	 */
    557 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
    558 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
    559 	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
    560 	    true);
    561 	if (gctx == NULL) {
    562 		return NULL;
    563 	}
    564 	gctx->lock = prof_gctx_mutex_choose();
    565 	/*
    566 	 * Set nlimbo to 1, in order to avoid a race condition with
    567 	 * prof_tctx_destroy()/prof_gctx_try_destroy().
    568 	 */
    569 	gctx->nlimbo = 1;
    570 	tctx_tree_new(&gctx->tctxs);
    571 	/* Duplicate bt. */
    572 	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
    573 	gctx->bt.vec = gctx->vec;
    574 	gctx->bt.len = bt->len;
    575 	return gctx;
    576 }
    577 
    578 static void
    579 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
    580     prof_tdata_t *tdata) {
    581 	cassert(config_prof);
    582 
    583 	/*
    584 	 * Check that gctx is still unused by any thread cache before destroying
    585 	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
    586 	 * condition with this function, as does prof_tctx_destroy() in order to
    587 	 * avoid a race between the main body of prof_tctx_destroy() and entry
    588 	 * into this function.
    589 	 */
    590 	prof_enter(tsd, tdata_self);
    591 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
    592 	assert(gctx->nlimbo != 0);
    593 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
    594 		/* Remove gctx from bt2gctx. */
    595 		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
    596 			not_reached();
    597 		}
    598 		prof_leave(tsd, tdata_self);
    599 		/* Destroy gctx. */
    600 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    601 		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
    602 	} else {
    603 		/*
    604 		 * Compensate for increment in prof_tctx_destroy() or
    605 		 * prof_lookup().
    606 		 */
    607 		gctx->nlimbo--;
    608 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    609 		prof_leave(tsd, tdata_self);
    610 	}
    611 }
    612 
    613 static bool
    614 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
    615 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
    616 
    617 	if (opt_prof_accum) {
    618 		return false;
    619 	}
    620 	if (tctx->cnts.curobjs != 0) {
    621 		return false;
    622 	}
    623 	if (tctx->prepared) {
    624 		return false;
    625 	}
    626 	return true;
    627 }
    628 
    629 static bool
    630 prof_gctx_should_destroy(prof_gctx_t *gctx) {
    631 	if (opt_prof_accum) {
    632 		return false;
    633 	}
    634 	if (!tctx_tree_empty(&gctx->tctxs)) {
    635 		return false;
    636 	}
    637 	if (gctx->nlimbo != 0) {
    638 		return false;
    639 	}
    640 	return true;
    641 }
    642 
    643 static void
    644 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
    645 	prof_tdata_t *tdata = tctx->tdata;
    646 	prof_gctx_t *gctx = tctx->gctx;
    647 	bool destroy_tdata, destroy_tctx, destroy_gctx;
    648 
    649 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
    650 
    651 	assert(tctx->cnts.curobjs == 0);
    652 	assert(tctx->cnts.curbytes == 0);
    653 	assert(!opt_prof_accum);
    654 	assert(tctx->cnts.accumobjs == 0);
    655 	assert(tctx->cnts.accumbytes == 0);
    656 
    657 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
    658 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
    659 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
    660 
    661 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
    662 	switch (tctx->state) {
    663 	case prof_tctx_state_nominal:
    664 		tctx_tree_remove(&gctx->tctxs, tctx);
    665 		destroy_tctx = true;
    666 		if (prof_gctx_should_destroy(gctx)) {
    667 			/*
    668 			 * Increment gctx->nlimbo in order to keep another
    669 			 * thread from winning the race to destroy gctx while
    670 			 * this one has gctx->lock dropped.  Without this, it
    671 			 * would be possible for another thread to:
    672 			 *
    673 			 * 1) Sample an allocation associated with gctx.
    674 			 * 2) Deallocate the sampled object.
    675 			 * 3) Successfully prof_gctx_try_destroy(gctx).
    676 			 *
    677 			 * The result would be that gctx no longer exists by the
    678 			 * time this thread accesses it in
    679 			 * prof_gctx_try_destroy().
    680 			 */
    681 			gctx->nlimbo++;
    682 			destroy_gctx = true;
    683 		} else {
    684 			destroy_gctx = false;
    685 		}
    686 		break;
    687 	case prof_tctx_state_dumping:
    688 		/*
    689 		 * A dumping thread needs tctx to remain valid until dumping
    690 		 * has finished.  Change state such that the dumping thread will
    691 		 * complete destruction during a late dump iteration phase.
    692 		 */
    693 		tctx->state = prof_tctx_state_purgatory;
    694 		destroy_tctx = false;
    695 		destroy_gctx = false;
    696 		break;
    697 	default:
    698 		not_reached();
    699 		destroy_tctx = false;
    700 		destroy_gctx = false;
    701 	}
    702 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    703 	if (destroy_gctx) {
    704 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
    705 		    tdata);
    706 	}
    707 
    708 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
    709 
    710 	if (destroy_tdata) {
    711 		prof_tdata_destroy(tsd, tdata, false);
    712 	}
    713 
    714 	if (destroy_tctx) {
    715 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
    716 	}
    717 }
    718 
    719 static bool
    720 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
    721     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
    722 	union {
    723 		prof_gctx_t	*p;
    724 		void		*v;
    725 	} gctx, tgctx;
    726 	union {
    727 		prof_bt_t	*p;
    728 		void		*v;
    729 	} btkey;
    730 	bool new_gctx;
    731 
    732 	prof_enter(tsd, tdata);
    733 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
    734 		/* bt has never been seen before.  Insert it. */
    735 		prof_leave(tsd, tdata);
    736 		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
    737 		if (tgctx.v == NULL) {
    738 			return true;
    739 		}
    740 		prof_enter(tsd, tdata);
    741 		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
    742 			gctx.p = tgctx.p;
    743 			btkey.p = &gctx.p->bt;
    744 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
    745 				/* OOM. */
    746 				prof_leave(tsd, tdata);
    747 				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
    748 				    true, true);
    749 				return true;
    750 			}
    751 			new_gctx = true;
    752 		} else {
    753 			new_gctx = false;
    754 		}
    755 	} else {
    756 		tgctx.v = NULL;
    757 		new_gctx = false;
    758 	}
    759 
    760 	if (!new_gctx) {
    761 		/*
    762 		 * Increment nlimbo, in order to avoid a race condition with
    763 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
    764 		 */
    765 		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
    766 		gctx.p->nlimbo++;
    767 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
    768 		new_gctx = false;
    769 
    770 		if (tgctx.v != NULL) {
    771 			/* Lost race to insert. */
    772 			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
    773 			    true);
    774 		}
    775 	}
    776 	prof_leave(tsd, tdata);
    777 
    778 	*p_btkey = btkey.v;
    779 	*p_gctx = gctx.p;
    780 	*p_new_gctx = new_gctx;
    781 	return false;
    782 }
    783 
    784 prof_tctx_t *
    785 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
    786 	union {
    787 		prof_tctx_t	*p;
    788 		void		*v;
    789 	} ret;
    790 	prof_tdata_t *tdata;
    791 	bool not_found;
    792 
    793 	cassert(config_prof);
    794 
    795 	tdata = prof_tdata_get(tsd, false);
    796 	if (tdata == NULL) {
    797 		return NULL;
    798 	}
    799 
    800 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
    801 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
    802 	if (!not_found) { /* Note double negative! */
    803 		ret.p->prepared = true;
    804 	}
    805 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
    806 	if (not_found) {
    807 		void *btkey;
    808 		prof_gctx_t *gctx;
    809 		bool new_gctx, error;
    810 
    811 		/*
    812 		 * This thread's cache lacks bt.  Look for it in the global
    813 		 * cache.
    814 		 */
    815 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
    816 		    &new_gctx)) {
    817 			return NULL;
    818 		}
    819 
    820 		/* Link a prof_tctx_t into gctx for this thread. */
    821 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
    822 		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
    823 		    arena_ichoose(tsd, NULL), true);
    824 		if (ret.p == NULL) {
    825 			if (new_gctx) {
    826 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
    827 			}
    828 			return NULL;
    829 		}
    830 		ret.p->tdata = tdata;
    831 		ret.p->thr_uid = tdata->thr_uid;
    832 		ret.p->thr_discrim = tdata->thr_discrim;
    833 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
    834 		ret.p->gctx = gctx;
    835 		ret.p->tctx_uid = tdata->tctx_uid_next++;
    836 		ret.p->prepared = true;
    837 		ret.p->state = prof_tctx_state_initializing;
    838 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
    839 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
    840 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
    841 		if (error) {
    842 			if (new_gctx) {
    843 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
    844 			}
    845 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
    846 			return NULL;
    847 		}
    848 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
    849 		ret.p->state = prof_tctx_state_nominal;
    850 		tctx_tree_insert(&gctx->tctxs, ret.p);
    851 		gctx->nlimbo--;
    852 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    853 	}
    854 
    855 	return ret.p;
    856 }
    857 
    858 /*
    859  * The bodies of this function and prof_leakcheck() are compiled out unless heap
    860  * profiling is enabled, so that it is possible to compile jemalloc with
    861  * floating point support completely disabled.  Avoiding floating point code is
    862  * important on memory-constrained systems, but it also enables a workaround for
    863  * versions of glibc that don't properly save/restore floating point registers
    864  * during dynamic lazy symbol loading (which internally calls into whatever
    865  * malloc implementation happens to be integrated into the application).  Note
    866  * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
    867  * memory moves, so jemalloc must be compiled with such optimizations disabled
    868  * (e.g.
    869  * -mno-sse) in order for the workaround to be complete.
    870  */
    871 void
    872 prof_sample_threshold_update(prof_tdata_t *tdata) {
    873 #ifdef JEMALLOC_PROF
    874 	uint64_t r;
    875 	double u;
    876 
    877 	if (!config_prof) {
    878 		return;
    879 	}
    880 
    881 	if (lg_prof_sample == 0) {
    882 		tdata->bytes_until_sample = 0;
    883 		return;
    884 	}
    885 
    886 	/*
    887 	 * Compute sample interval as a geometrically distributed random
    888 	 * variable with mean (2^lg_prof_sample).
    889 	 *
    890 	 *                             __        __
    891 	 *                             |  log(u)  |                     1
    892 	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
    893 	 *                             | log(1-p) |             lg_prof_sample
    894 	 *                                                     2
    895 	 *
    896 	 * For more information on the math, see:
    897 	 *
    898 	 *   Non-Uniform Random Variate Generation
    899 	 *   Luc Devroye
    900 	 *   Springer-Verlag, New York, 1986
    901 	 *   pp 500
    902 	 *   (http://luc.devroye.org/rnbookindex.html)
    903 	 */
    904 	r = prng_lg_range_u64(&tdata->prng_state, 53);
    905 	u = (double)r * (1.0/9007199254740992.0L);
    906 	tdata->bytes_until_sample = (uint64_t)(log(u) /
    907 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
    908 	    + (uint64_t)1U;
    909 #endif
    910 }
    911 
    912 #ifdef JEMALLOC_JET
    913 static prof_tdata_t *
    914 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
    915     void *arg) {
    916 	size_t *tdata_count = (size_t *)arg;
    917 
    918 	(*tdata_count)++;
    919 
    920 	return NULL;
    921 }
    922 
    923 size_t
    924 prof_tdata_count(void) {
    925 	size_t tdata_count = 0;
    926 	tsdn_t *tsdn;
    927 
    928 	tsdn = tsdn_fetch();
    929 	malloc_mutex_lock(tsdn, &tdatas_mtx);
    930 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
    931 	    (void *)&tdata_count);
    932 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
    933 
    934 	return tdata_count;
    935 }
    936 
    937 size_t
    938 prof_bt_count(void) {
    939 	size_t bt_count;
    940 	tsd_t *tsd;
    941 	prof_tdata_t *tdata;
    942 
    943 	tsd = tsd_fetch();
    944 	tdata = prof_tdata_get(tsd, false);
    945 	if (tdata == NULL) {
    946 		return 0;
    947 	}
    948 
    949 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
    950 	bt_count = ckh_count(&bt2gctx);
    951 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
    952 
    953 	return bt_count;
    954 }
    955 #endif
    956 
    957 static int
    958 prof_dump_open_impl(bool propagate_err, const char *filename) {
    959 	int fd;
    960 
    961 	fd = creat(filename, 0644);
    962 	if (fd == -1 && !propagate_err) {
    963 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
    964 		    filename);
    965 		if (opt_abort) {
    966 			abort();
    967 		}
    968 	}
    969 
    970 	return fd;
    971 }
    972 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
    973 
    974 static bool
    975 prof_dump_flush(bool propagate_err) {
    976 	bool ret = false;
    977 	ssize_t err;
    978 
    979 	cassert(config_prof);
    980 
    981 	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
    982 	if (err == -1) {
    983 		if (!propagate_err) {
    984 			malloc_write("<jemalloc>: write() failed during heap "
    985 			    "profile flush\n");
    986 			if (opt_abort) {
    987 				abort();
    988 			}
    989 		}
    990 		ret = true;
    991 	}
    992 	prof_dump_buf_end = 0;
    993 
    994 	return ret;
    995 }
    996 
    997 static bool
    998 prof_dump_close(bool propagate_err) {
    999 	bool ret;
   1000 
   1001 	assert(prof_dump_fd != -1);
   1002 	ret = prof_dump_flush(propagate_err);
   1003 	close(prof_dump_fd);
   1004 	prof_dump_fd = -1;
   1005 
   1006 	return ret;
   1007 }
   1008 
   1009 static bool
   1010 prof_dump_write(bool propagate_err, const char *s) {
   1011 	size_t i, slen, n;
   1012 
   1013 	cassert(config_prof);
   1014 
   1015 	i = 0;
   1016 	slen = strlen(s);
   1017 	while (i < slen) {
   1018 		/* Flush the buffer if it is full. */
   1019 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
   1020 			if (prof_dump_flush(propagate_err) && propagate_err) {
   1021 				return true;
   1022 			}
   1023 		}
   1024 
   1025 		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
   1026 			/* Finish writing. */
   1027 			n = slen - i;
   1028 		} else {
   1029 			/* Write as much of s as will fit. */
   1030 			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
   1031 		}
   1032 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
   1033 		prof_dump_buf_end += n;
   1034 		i += n;
   1035 	}
   1036 
   1037 	return false;
   1038 }
   1039 
   1040 JEMALLOC_FORMAT_PRINTF(2, 3)
   1041 static bool
   1042 prof_dump_printf(bool propagate_err, const char *format, ...) {
   1043 	bool ret;
   1044 	va_list ap;
   1045 	char buf[PROF_PRINTF_BUFSIZE];
   1046 
   1047 	va_start(ap, format);
   1048 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
   1049 	va_end(ap);
   1050 	ret = prof_dump_write(propagate_err, buf);
   1051 
   1052 	return ret;
   1053 }
   1054 
   1055 static void
   1056 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
   1057 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
   1058 
   1059 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
   1060 
   1061 	switch (tctx->state) {
   1062 	case prof_tctx_state_initializing:
   1063 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
   1064 		return;
   1065 	case prof_tctx_state_nominal:
   1066 		tctx->state = prof_tctx_state_dumping;
   1067 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
   1068 
   1069 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
   1070 
   1071 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
   1072 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
   1073 		if (opt_prof_accum) {
   1074 			tdata->cnt_summed.accumobjs +=
   1075 			    tctx->dump_cnts.accumobjs;
   1076 			tdata->cnt_summed.accumbytes +=
   1077 			    tctx->dump_cnts.accumbytes;
   1078 		}
   1079 		break;
   1080 	case prof_tctx_state_dumping:
   1081 	case prof_tctx_state_purgatory:
   1082 		not_reached();
   1083 	}
   1084 }
   1085 
   1086 static void
   1087 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
   1088 	malloc_mutex_assert_owner(tsdn, gctx->lock);
   1089 
   1090 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
   1091 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
   1092 	if (opt_prof_accum) {
   1093 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
   1094 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
   1095 	}
   1096 }
   1097 
   1098 static prof_tctx_t *
   1099 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
   1100 	tsdn_t *tsdn = (tsdn_t *)arg;
   1101 
   1102 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
   1103 
   1104 	switch (tctx->state) {
   1105 	case prof_tctx_state_nominal:
   1106 		/* New since dumping started; ignore. */
   1107 		break;
   1108 	case prof_tctx_state_dumping:
   1109 	case prof_tctx_state_purgatory:
   1110 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
   1111 		break;
   1112 	default:
   1113 		not_reached();
   1114 	}
   1115 
   1116 	return NULL;
   1117 }
   1118 
   1119 struct prof_tctx_dump_iter_arg_s {
   1120 	tsdn_t	*tsdn;
   1121 	bool	propagate_err;
   1122 };
   1123 
   1124 static prof_tctx_t *
   1125 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
   1126 	struct prof_tctx_dump_iter_arg_s *arg =
   1127 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
   1128 
   1129 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
   1130 
   1131 	switch (tctx->state) {
   1132 	case prof_tctx_state_initializing:
   1133 	case prof_tctx_state_nominal:
   1134 		/* Not captured by this dump. */
   1135 		break;
   1136 	case prof_tctx_state_dumping:
   1137 	case prof_tctx_state_purgatory:
   1138 		if (prof_dump_printf(arg->propagate_err,
   1139 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
   1140 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
   1141 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
   1142 		    tctx->dump_cnts.accumbytes)) {
   1143 			return tctx;
   1144 		}
   1145 		break;
   1146 	default:
   1147 		not_reached();
   1148 	}
   1149 	return NULL;
   1150 }
   1151 
   1152 static prof_tctx_t *
   1153 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
   1154 	tsdn_t *tsdn = (tsdn_t *)arg;
   1155 	prof_tctx_t *ret;
   1156 
   1157 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
   1158 
   1159 	switch (tctx->state) {
   1160 	case prof_tctx_state_nominal:
   1161 		/* New since dumping started; ignore. */
   1162 		break;
   1163 	case prof_tctx_state_dumping:
   1164 		tctx->state = prof_tctx_state_nominal;
   1165 		break;
   1166 	case prof_tctx_state_purgatory:
   1167 		ret = tctx;
   1168 		goto label_return;
   1169 	default:
   1170 		not_reached();
   1171 	}
   1172 
   1173 	ret = NULL;
   1174 label_return:
   1175 	return ret;
   1176 }
   1177 
   1178 static void
   1179 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
   1180 	cassert(config_prof);
   1181 
   1182 	malloc_mutex_lock(tsdn, gctx->lock);
   1183 
   1184 	/*
   1185 	 * Increment nlimbo so that gctx won't go away before dump.
   1186 	 * Additionally, link gctx into the dump list so that it is included in
   1187 	 * prof_dump()'s second pass.
   1188 	 */
   1189 	gctx->nlimbo++;
   1190 	gctx_tree_insert(gctxs, gctx);
   1191 
   1192 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
   1193 
   1194 	malloc_mutex_unlock(tsdn, gctx->lock);
   1195 }
   1196 
   1197 struct prof_gctx_merge_iter_arg_s {
   1198 	tsdn_t	*tsdn;
   1199 	size_t	leak_ngctx;
   1200 };
   1201 
   1202 static prof_gctx_t *
   1203 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
   1204 	struct prof_gctx_merge_iter_arg_s *arg =
   1205 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
   1206 
   1207 	malloc_mutex_lock(arg->tsdn, gctx->lock);
   1208 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
   1209 	    (void *)arg->tsdn);
   1210 	if (gctx->cnt_summed.curobjs != 0) {
   1211 		arg->leak_ngctx++;
   1212 	}
   1213 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
   1214 
   1215 	return NULL;
   1216 }
   1217 
   1218 static void
   1219 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
   1220 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
   1221 	prof_gctx_t *gctx;
   1222 
   1223 	/*
   1224 	 * Standard tree iteration won't work here, because as soon as we
   1225 	 * decrement gctx->nlimbo and unlock gctx, another thread can
   1226 	 * concurrently destroy it, which will corrupt the tree.  Therefore,
   1227 	 * tear down the tree one node at a time during iteration.
   1228 	 */
   1229 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
   1230 		gctx_tree_remove(gctxs, gctx);
   1231 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
   1232 		{
   1233 			prof_tctx_t *next;
   1234 
   1235 			next = NULL;
   1236 			do {
   1237 				prof_tctx_t *to_destroy =
   1238 				    tctx_tree_iter(&gctx->tctxs, next,
   1239 				    prof_tctx_finish_iter,
   1240 				    (void *)tsd_tsdn(tsd));
   1241 				if (to_destroy != NULL) {
   1242 					next = tctx_tree_next(&gctx->tctxs,
   1243 					    to_destroy);
   1244 					tctx_tree_remove(&gctx->tctxs,
   1245 					    to_destroy);
   1246 					idalloctm(tsd_tsdn(tsd), to_destroy,
   1247 					    NULL, NULL, true, true);
   1248 				} else {
   1249 					next = NULL;
   1250 				}
   1251 			} while (next != NULL);
   1252 		}
   1253 		gctx->nlimbo--;
   1254 		if (prof_gctx_should_destroy(gctx)) {
   1255 			gctx->nlimbo++;
   1256 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
   1257 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
   1258 		} else {
   1259 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
   1260 		}
   1261 	}
   1262 }
   1263 
   1264 struct prof_tdata_merge_iter_arg_s {
   1265 	tsdn_t		*tsdn;
   1266 	prof_cnt_t	cnt_all;
   1267 };
   1268 
   1269 static prof_tdata_t *
   1270 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
   1271     void *opaque) {
   1272 	struct prof_tdata_merge_iter_arg_s *arg =
   1273 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
   1274 
   1275 	malloc_mutex_lock(arg->tsdn, tdata->lock);
   1276 	if (!tdata->expired) {
   1277 		size_t tabind;
   1278 		union {
   1279 			prof_tctx_t	*p;
   1280 			void		*v;
   1281 		} tctx;
   1282 
   1283 		tdata->dumping = true;
   1284 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
   1285 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
   1286 		    &tctx.v);) {
   1287 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
   1288 		}
   1289 
   1290 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
   1291 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
   1292 		if (opt_prof_accum) {
   1293 			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
   1294 			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
   1295 		}
   1296 	} else {
   1297 		tdata->dumping = false;
   1298 	}
   1299 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
   1300 
   1301 	return NULL;
   1302 }
   1303 
   1304 static prof_tdata_t *
   1305 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
   1306     void *arg) {
   1307 	bool propagate_err = *(bool *)arg;
   1308 
   1309 	if (!tdata->dumping) {
   1310 		return NULL;
   1311 	}
   1312 
   1313 	if (prof_dump_printf(propagate_err,
   1314 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
   1315 	    tdata->thr_uid, tdata->cnt_summed.curobjs,
   1316 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
   1317 	    tdata->cnt_summed.accumbytes,
   1318 	    (tdata->thread_name != NULL) ? " " : "",
   1319 	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
   1320 		return tdata;
   1321 	}
   1322 	return NULL;
   1323 }
   1324 
   1325 static bool
   1326 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
   1327     const prof_cnt_t *cnt_all) {
   1328 	bool ret;
   1329 
   1330 	if (prof_dump_printf(propagate_err,
   1331 	    "heap_v2/%"FMTu64"\n"
   1332 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
   1333 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
   1334 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
   1335 		return true;
   1336 	}
   1337 
   1338 	malloc_mutex_lock(tsdn, &tdatas_mtx);
   1339 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
   1340 	    (void *)&propagate_err) != NULL);
   1341 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
   1342 	return ret;
   1343 }
   1344 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
   1345 
   1346 static bool
   1347 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
   1348     const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
   1349 	bool ret;
   1350 	unsigned i;
   1351 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
   1352 
   1353 	cassert(config_prof);
   1354 	malloc_mutex_assert_owner(tsdn, gctx->lock);
   1355 
   1356 	/* Avoid dumping such gctx's that have no useful data. */
   1357 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
   1358 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
   1359 		assert(gctx->cnt_summed.curobjs == 0);
   1360 		assert(gctx->cnt_summed.curbytes == 0);
   1361 		assert(gctx->cnt_summed.accumobjs == 0);
   1362 		assert(gctx->cnt_summed.accumbytes == 0);
   1363 		ret = false;
   1364 		goto label_return;
   1365 	}
   1366 
   1367 	if (prof_dump_printf(propagate_err, "@")) {
   1368 		ret = true;
   1369 		goto label_return;
   1370 	}
   1371 	for (i = 0; i < bt->len; i++) {
   1372 		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
   1373 		    (uintptr_t)bt->vec[i])) {
   1374 			ret = true;
   1375 			goto label_return;
   1376 		}
   1377 	}
   1378 
   1379 	if (prof_dump_printf(propagate_err,
   1380 	    "\n"
   1381 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
   1382 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
   1383 	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
   1384 		ret = true;
   1385 		goto label_return;
   1386 	}
   1387 
   1388 	prof_tctx_dump_iter_arg.tsdn = tsdn;
   1389 	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
   1390 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
   1391 	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
   1392 		ret = true;
   1393 		goto label_return;
   1394 	}
   1395 
   1396 	ret = false;
   1397 label_return:
   1398 	return ret;
   1399 }
   1400 
   1401 #ifndef _WIN32
   1402 JEMALLOC_FORMAT_PRINTF(1, 2)
   1403 static int
   1404 prof_open_maps(const char *format, ...) {
   1405 	int mfd;
   1406 	va_list ap;
   1407 	char filename[PATH_MAX + 1];
   1408 
   1409 	va_start(ap, format);
   1410 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
   1411 	va_end(ap);
   1412 
   1413 #if defined(O_CLOEXEC)
   1414 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
   1415 #else
   1416 	mfd = open(filename, O_RDONLY);
   1417 	if (mfd != -1) {
   1418 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
   1419 	}
   1420 #endif
   1421 
   1422 	return mfd;
   1423 }
   1424 #endif
   1425 
   1426 static int
   1427 prof_getpid(void) {
   1428 #ifdef _WIN32
   1429 	return GetCurrentProcessId();
   1430 #else
   1431 	return getpid();
   1432 #endif
   1433 }
   1434 
   1435 static bool
   1436 prof_dump_maps(bool propagate_err) {
   1437 	bool ret;
   1438 	int mfd;
   1439 
   1440 	cassert(config_prof);
   1441 #ifdef __FreeBSD__
   1442 	mfd = prof_open_maps("/proc/curproc/map");
   1443 #elif defined(_WIN32)
   1444 	mfd = -1; // Not implemented
   1445 #else
   1446 	{
   1447 		int pid = prof_getpid();
   1448 
   1449 		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
   1450 		if (mfd == -1) {
   1451 			mfd = prof_open_maps("/proc/%d/maps", pid);
   1452 		}
   1453 	}
   1454 #endif
   1455 	if (mfd != -1) {
   1456 		ssize_t nread;
   1457 
   1458 		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
   1459 		    propagate_err) {
   1460 			ret = true;
   1461 			goto label_return;
   1462 		}
   1463 		nread = 0;
   1464 		do {
   1465 			prof_dump_buf_end += nread;
   1466 			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
   1467 				/* Make space in prof_dump_buf before read(). */
   1468 				if (prof_dump_flush(propagate_err) &&
   1469 				    propagate_err) {
   1470 					ret = true;
   1471 					goto label_return;
   1472 				}
   1473 			}
   1474 			nread = malloc_read_fd(mfd,
   1475 			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
   1476 			    - prof_dump_buf_end);
   1477 		} while (nread > 0);
   1478 	} else {
   1479 		ret = true;
   1480 		goto label_return;
   1481 	}
   1482 
   1483 	ret = false;
   1484 label_return:
   1485 	if (mfd != -1) {
   1486 		close(mfd);
   1487 	}
   1488 	return ret;
   1489 }
   1490 
   1491 /*
   1492  * See prof_sample_threshold_update() comment for why the body of this function
   1493  * is conditionally compiled.
   1494  */
   1495 static void
   1496 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
   1497     const char *filename) {
   1498 #ifdef JEMALLOC_PROF
   1499 	/*
   1500 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
   1501 	 * differ slightly from what jeprof reports, because here we scale the
   1502 	 * summary values, whereas jeprof scales each context individually and
   1503 	 * reports the sums of the scaled values.
   1504 	 */
   1505 	if (cnt_all->curbytes != 0) {
   1506 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
   1507 		double ratio = (((double)cnt_all->curbytes) /
   1508 		    (double)cnt_all->curobjs) / sample_period;
   1509 		double scale_factor = 1.0 / (1.0 - exp(-ratio));
   1510 		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
   1511 		    * scale_factor);
   1512 		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
   1513 		    scale_factor);
   1514 
   1515 		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
   1516 		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
   1517 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
   1518 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
   1519 		malloc_printf(
   1520 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
   1521 		    filename);
   1522 	}
   1523 #endif
   1524 }
   1525 
   1526 struct prof_gctx_dump_iter_arg_s {
   1527 	tsdn_t	*tsdn;
   1528 	bool	propagate_err;
   1529 };
   1530 
   1531 static prof_gctx_t *
   1532 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
   1533 	prof_gctx_t *ret;
   1534 	struct prof_gctx_dump_iter_arg_s *arg =
   1535 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
   1536 
   1537 	malloc_mutex_lock(arg->tsdn, gctx->lock);
   1538 
   1539 	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
   1540 	    gctxs)) {
   1541 		ret = gctx;
   1542 		goto label_return;
   1543 	}
   1544 
   1545 	ret = NULL;
   1546 label_return:
   1547 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
   1548 	return ret;
   1549 }
   1550 
   1551 static void
   1552 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
   1553     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
   1554     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
   1555     prof_gctx_tree_t *gctxs) {
   1556 	size_t tabind;
   1557 	union {
   1558 		prof_gctx_t	*p;
   1559 		void		*v;
   1560 	} gctx;
   1561 
   1562 	prof_enter(tsd, tdata);
   1563 
   1564 	/*
   1565 	 * Put gctx's in limbo and clear their counters in preparation for
   1566 	 * summing.
   1567 	 */
   1568 	gctx_tree_new(gctxs);
   1569 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
   1570 		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
   1571 	}
   1572 
   1573 	/*
   1574 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
   1575 	 * stats and merge them into the associated gctx's.
   1576 	 */
   1577 	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
   1578 	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
   1579 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1580 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
   1581 	    (void *)prof_tdata_merge_iter_arg);
   1582 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1583 
   1584 	/* Merge tctx stats into gctx's. */
   1585 	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
   1586 	prof_gctx_merge_iter_arg->leak_ngctx = 0;
   1587 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
   1588 	    (void *)prof_gctx_merge_iter_arg);
   1589 
   1590 	prof_leave(tsd, tdata);
   1591 }
   1592 
   1593 static bool
   1594 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
   1595     bool leakcheck, prof_tdata_t *tdata,
   1596     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
   1597     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
   1598     struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
   1599     prof_gctx_tree_t *gctxs) {
   1600 	/* Create dump file. */
   1601 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
   1602 		return true;
   1603 	}
   1604 
   1605 	/* Dump profile header. */
   1606 	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
   1607 	    &prof_tdata_merge_iter_arg->cnt_all)) {
   1608 		goto label_write_error;
   1609 	}
   1610 
   1611 	/* Dump per gctx profile stats. */
   1612 	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
   1613 	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
   1614 	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
   1615 	    (void *)prof_gctx_dump_iter_arg) != NULL) {
   1616 		goto label_write_error;
   1617 	}
   1618 
   1619 	/* Dump /proc/<pid>/maps if possible. */
   1620 	if (prof_dump_maps(propagate_err)) {
   1621 		goto label_write_error;
   1622 	}
   1623 
   1624 	if (prof_dump_close(propagate_err)) {
   1625 		return true;
   1626 	}
   1627 
   1628 	return false;
   1629 label_write_error:
   1630 	prof_dump_close(propagate_err);
   1631 	return true;
   1632 }
   1633 
   1634 static bool
   1635 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
   1636     bool leakcheck) {
   1637 	cassert(config_prof);
   1638 	assert(tsd_reentrancy_level_get(tsd) == 0);
   1639 
   1640 	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
   1641 	if (tdata == NULL) {
   1642 		return true;
   1643 	}
   1644 
   1645 	pre_reentrancy(tsd, NULL);
   1646 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
   1647 
   1648 	prof_gctx_tree_t gctxs;
   1649 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
   1650 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
   1651 	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
   1652 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
   1653 	    &prof_gctx_merge_iter_arg, &gctxs);
   1654 	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
   1655 	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
   1656 	    &prof_gctx_dump_iter_arg, &gctxs);
   1657 	prof_gctx_finish(tsd, &gctxs);
   1658 
   1659 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
   1660 	post_reentrancy(tsd);
   1661 
   1662 	if (err) {
   1663 		return true;
   1664 	}
   1665 
   1666 	if (leakcheck) {
   1667 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
   1668 		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
   1669 	}
   1670 	return false;
   1671 }
   1672 
   1673 #ifdef JEMALLOC_JET
   1674 void
   1675 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
   1676     uint64_t *accumbytes) {
   1677 	tsd_t *tsd;
   1678 	prof_tdata_t *tdata;
   1679 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
   1680 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
   1681 	prof_gctx_tree_t gctxs;
   1682 
   1683 	tsd = tsd_fetch();
   1684 	tdata = prof_tdata_get(tsd, false);
   1685 	if (tdata == NULL) {
   1686 		if (curobjs != NULL) {
   1687 			*curobjs = 0;
   1688 		}
   1689 		if (curbytes != NULL) {
   1690 			*curbytes = 0;
   1691 		}
   1692 		if (accumobjs != NULL) {
   1693 			*accumobjs = 0;
   1694 		}
   1695 		if (accumbytes != NULL) {
   1696 			*accumbytes = 0;
   1697 		}
   1698 		return;
   1699 	}
   1700 
   1701 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
   1702 	    &prof_gctx_merge_iter_arg, &gctxs);
   1703 	prof_gctx_finish(tsd, &gctxs);
   1704 
   1705 	if (curobjs != NULL) {
   1706 		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
   1707 	}
   1708 	if (curbytes != NULL) {
   1709 		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
   1710 	}
   1711 	if (accumobjs != NULL) {
   1712 		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
   1713 	}
   1714 	if (accumbytes != NULL) {
   1715 		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
   1716 	}
   1717 }
   1718 #endif
   1719 
   1720 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
   1721 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
   1722 static void
   1723 prof_dump_filename(char *filename, char v, uint64_t vseq) {
   1724 	cassert(config_prof);
   1725 
   1726 	if (vseq != VSEQ_INVALID) {
   1727 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
   1728 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
   1729 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
   1730 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
   1731 	} else {
   1732 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
   1733 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
   1734 		    "%s.%d.%"FMTu64".%c.heap",
   1735 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
   1736 	}
   1737 	prof_dump_seq++;
   1738 }
   1739 
   1740 static void
   1741 prof_fdump(void) {
   1742 	tsd_t *tsd;
   1743 	char filename[DUMP_FILENAME_BUFSIZE];
   1744 
   1745 	cassert(config_prof);
   1746 	assert(opt_prof_final);
   1747 	assert(opt_prof_prefix[0] != '\0');
   1748 
   1749 	if (!prof_booted) {
   1750 		return;
   1751 	}
   1752 	tsd = tsd_fetch();
   1753 	assert(tsd_reentrancy_level_get(tsd) == 0);
   1754 
   1755 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
   1756 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
   1757 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
   1758 	prof_dump(tsd, false, filename, opt_prof_leak);
   1759 }
   1760 
   1761 bool
   1762 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
   1763 	cassert(config_prof);
   1764 
   1765 #ifndef JEMALLOC_ATOMIC_U64
   1766 	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
   1767 	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
   1768 		return true;
   1769 	}
   1770 	prof_accum->accumbytes = 0;
   1771 #else
   1772 	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
   1773 #endif
   1774 	return false;
   1775 }
   1776 
   1777 void
   1778 prof_idump(tsdn_t *tsdn) {
   1779 	tsd_t *tsd;
   1780 	prof_tdata_t *tdata;
   1781 
   1782 	cassert(config_prof);
   1783 
   1784 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
   1785 		return;
   1786 	}
   1787 	tsd = tsdn_tsd(tsdn);
   1788 	if (tsd_reentrancy_level_get(tsd) > 0) {
   1789 		return;
   1790 	}
   1791 
   1792 	tdata = prof_tdata_get(tsd, false);
   1793 	if (tdata == NULL) {
   1794 		return;
   1795 	}
   1796 	if (tdata->enq) {
   1797 		tdata->enq_idump = true;
   1798 		return;
   1799 	}
   1800 
   1801 	if (opt_prof_prefix[0] != '\0') {
   1802 		char filename[PATH_MAX + 1];
   1803 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
   1804 		prof_dump_filename(filename, 'i', prof_dump_iseq);
   1805 		prof_dump_iseq++;
   1806 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
   1807 		prof_dump(tsd, false, filename, false);
   1808 	}
   1809 }
   1810 
   1811 bool
   1812 prof_mdump(tsd_t *tsd, const char *filename) {
   1813 	cassert(config_prof);
   1814 	assert(tsd_reentrancy_level_get(tsd) == 0);
   1815 
   1816 	if (!opt_prof || !prof_booted) {
   1817 		return true;
   1818 	}
   1819 	char filename_buf[DUMP_FILENAME_BUFSIZE];
   1820 	if (filename == NULL) {
   1821 		/* No filename specified, so automatically generate one. */
   1822 		if (opt_prof_prefix[0] == '\0') {
   1823 			return true;
   1824 		}
   1825 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
   1826 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
   1827 		prof_dump_mseq++;
   1828 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
   1829 		filename = filename_buf;
   1830 	}
   1831 	return prof_dump(tsd, true, filename, false);
   1832 }
   1833 
   1834 void
   1835 prof_gdump(tsdn_t *tsdn) {
   1836 	tsd_t *tsd;
   1837 	prof_tdata_t *tdata;
   1838 
   1839 	cassert(config_prof);
   1840 
   1841 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
   1842 		return;
   1843 	}
   1844 	tsd = tsdn_tsd(tsdn);
   1845 	if (tsd_reentrancy_level_get(tsd) > 0) {
   1846 		return;
   1847 	}
   1848 
   1849 	tdata = prof_tdata_get(tsd, false);
   1850 	if (tdata == NULL) {
   1851 		return;
   1852 	}
   1853 	if (tdata->enq) {
   1854 		tdata->enq_gdump = true;
   1855 		return;
   1856 	}
   1857 
   1858 	if (opt_prof_prefix[0] != '\0') {
   1859 		char filename[DUMP_FILENAME_BUFSIZE];
   1860 		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
   1861 		prof_dump_filename(filename, 'u', prof_dump_useq);
   1862 		prof_dump_useq++;
   1863 		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
   1864 		prof_dump(tsd, false, filename, false);
   1865 	}
   1866 }
   1867 
   1868 static void
   1869 prof_bt_hash(const void *key, size_t r_hash[2]) {
   1870 	prof_bt_t *bt = (prof_bt_t *)key;
   1871 
   1872 	cassert(config_prof);
   1873 
   1874 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
   1875 }
   1876 
   1877 static bool
   1878 prof_bt_keycomp(const void *k1, const void *k2) {
   1879 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
   1880 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
   1881 
   1882 	cassert(config_prof);
   1883 
   1884 	if (bt1->len != bt2->len) {
   1885 		return false;
   1886 	}
   1887 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
   1888 }
   1889 
   1890 static uint64_t
   1891 prof_thr_uid_alloc(tsdn_t *tsdn) {
   1892 	uint64_t thr_uid;
   1893 
   1894 	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
   1895 	thr_uid = next_thr_uid;
   1896 	next_thr_uid++;
   1897 	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
   1898 
   1899 	return thr_uid;
   1900 }
   1901 
   1902 static prof_tdata_t *
   1903 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
   1904     char *thread_name, bool active) {
   1905 	prof_tdata_t *tdata;
   1906 
   1907 	cassert(config_prof);
   1908 
   1909 	/* Initialize an empty cache for this thread. */
   1910 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
   1911 	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
   1912 	    arena_get(TSDN_NULL, 0, true), true);
   1913 	if (tdata == NULL) {
   1914 		return NULL;
   1915 	}
   1916 
   1917 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
   1918 	tdata->thr_uid = thr_uid;
   1919 	tdata->thr_discrim = thr_discrim;
   1920 	tdata->thread_name = thread_name;
   1921 	tdata->attached = true;
   1922 	tdata->expired = false;
   1923 	tdata->tctx_uid_next = 0;
   1924 
   1925 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
   1926 	    prof_bt_keycomp)) {
   1927 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
   1928 		return NULL;
   1929 	}
   1930 
   1931 	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
   1932 	prof_sample_threshold_update(tdata);
   1933 
   1934 	tdata->enq = false;
   1935 	tdata->enq_idump = false;
   1936 	tdata->enq_gdump = false;
   1937 
   1938 	tdata->dumping = false;
   1939 	tdata->active = active;
   1940 
   1941 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1942 	tdata_tree_insert(&tdatas, tdata);
   1943 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1944 
   1945 	return tdata;
   1946 }
   1947 
   1948 prof_tdata_t *
   1949 prof_tdata_init(tsd_t *tsd) {
   1950 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
   1951 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
   1952 }
   1953 
   1954 static bool
   1955 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
   1956 	if (tdata->attached && !even_if_attached) {
   1957 		return false;
   1958 	}
   1959 	if (ckh_count(&tdata->bt2tctx) != 0) {
   1960 		return false;
   1961 	}
   1962 	return true;
   1963 }
   1964 
   1965 static bool
   1966 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
   1967     bool even_if_attached) {
   1968 	malloc_mutex_assert_owner(tsdn, tdata->lock);
   1969 
   1970 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
   1971 }
   1972 
   1973 static void
   1974 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
   1975     bool even_if_attached) {
   1976 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
   1977 
   1978 	tdata_tree_remove(&tdatas, tdata);
   1979 
   1980 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
   1981 
   1982 	if (tdata->thread_name != NULL) {
   1983 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
   1984 		    true);
   1985 	}
   1986 	ckh_delete(tsd, &tdata->bt2tctx);
   1987 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
   1988 }
   1989 
   1990 static void
   1991 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
   1992 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1993 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
   1994 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1995 }
   1996 
   1997 static void
   1998 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
   1999 	bool destroy_tdata;
   2000 
   2001 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
   2002 	if (tdata->attached) {
   2003 		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
   2004 		    true);
   2005 		/*
   2006 		 * Only detach if !destroy_tdata, because detaching would allow
   2007 		 * another thread to win the race to destroy tdata.
   2008 		 */
   2009 		if (!destroy_tdata) {
   2010 			tdata->attached = false;
   2011 		}
   2012 		tsd_prof_tdata_set(tsd, NULL);
   2013 	} else {
   2014 		destroy_tdata = false;
   2015 	}
   2016 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
   2017 	if (destroy_tdata) {
   2018 		prof_tdata_destroy(tsd, tdata, true);
   2019 	}
   2020 }
   2021 
   2022 prof_tdata_t *
   2023 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
   2024 	uint64_t thr_uid = tdata->thr_uid;
   2025 	uint64_t thr_discrim = tdata->thr_discrim + 1;
   2026 	char *thread_name = (tdata->thread_name != NULL) ?
   2027 	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
   2028 	bool active = tdata->active;
   2029 
   2030 	prof_tdata_detach(tsd, tdata);
   2031 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
   2032 	    active);
   2033 }
   2034 
   2035 static bool
   2036 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
   2037 	bool destroy_tdata;
   2038 
   2039 	malloc_mutex_lock(tsdn, tdata->lock);
   2040 	if (!tdata->expired) {
   2041 		tdata->expired = true;
   2042 		destroy_tdata = tdata->attached ? false :
   2043 		    prof_tdata_should_destroy(tsdn, tdata, false);
   2044 	} else {
   2045 		destroy_tdata = false;
   2046 	}
   2047 	malloc_mutex_unlock(tsdn, tdata->lock);
   2048 
   2049 	return destroy_tdata;
   2050 }
   2051 
   2052 static prof_tdata_t *
   2053 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
   2054     void *arg) {
   2055 	tsdn_t *tsdn = (tsdn_t *)arg;
   2056 
   2057 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
   2058 }
   2059 
   2060 void
   2061 prof_reset(tsd_t *tsd, size_t lg_sample) {
   2062 	prof_tdata_t *next;
   2063 
   2064 	assert(lg_sample < (sizeof(uint64_t) << 3));
   2065 
   2066 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
   2067 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   2068 
   2069 	lg_prof_sample = lg_sample;
   2070 
   2071 	next = NULL;
   2072 	do {
   2073 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
   2074 		    prof_tdata_reset_iter, (void *)tsd);
   2075 		if (to_destroy != NULL) {
   2076 			next = tdata_tree_next(&tdatas, to_destroy);
   2077 			prof_tdata_destroy_locked(tsd, to_destroy, false);
   2078 		} else {
   2079 			next = NULL;
   2080 		}
   2081 	} while (next != NULL);
   2082 
   2083 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   2084 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
   2085 }
   2086 
   2087 void
   2088 prof_tdata_cleanup(tsd_t *tsd) {
   2089 	prof_tdata_t *tdata;
   2090 
   2091 	if (!config_prof) {
   2092 		return;
   2093 	}
   2094 
   2095 	tdata = tsd_prof_tdata_get(tsd);
   2096 	if (tdata != NULL) {
   2097 		prof_tdata_detach(tsd, tdata);
   2098 	}
   2099 }
   2100 
   2101 bool
   2102 prof_active_get(tsdn_t *tsdn) {
   2103 	bool prof_active_current;
   2104 
   2105 	malloc_mutex_lock(tsdn, &prof_active_mtx);
   2106 	prof_active_current = prof_active;
   2107 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
   2108 	return prof_active_current;
   2109 }
   2110 
   2111 bool
   2112 prof_active_set(tsdn_t *tsdn, bool active) {
   2113 	bool prof_active_old;
   2114 
   2115 	malloc_mutex_lock(tsdn, &prof_active_mtx);
   2116 	prof_active_old = prof_active;
   2117 	prof_active = active;
   2118 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
   2119 	return prof_active_old;
   2120 }
   2121 
   2122 const char *
   2123 prof_thread_name_get(tsd_t *tsd) {
   2124 	prof_tdata_t *tdata;
   2125 
   2126 	tdata = prof_tdata_get(tsd, true);
   2127 	if (tdata == NULL) {
   2128 		return "";
   2129 	}
   2130 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
   2131 }
   2132 
   2133 static char *
   2134 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
   2135 	char *ret;
   2136 	size_t size;
   2137 
   2138 	if (thread_name == NULL) {
   2139 		return NULL;
   2140 	}
   2141 
   2142 	size = strlen(thread_name) + 1;
   2143 	if (size == 1) {
   2144 		return "";
   2145 	}
   2146 
   2147 	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
   2148 	    arena_get(TSDN_NULL, 0, true), true);
   2149 	if (ret == NULL) {
   2150 		return NULL;
   2151 	}
   2152 	memcpy(ret, thread_name, size);
   2153 	return ret;
   2154 }
   2155 
   2156 int
   2157 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
   2158 	prof_tdata_t *tdata;
   2159 	unsigned i;
   2160 	char *s;
   2161 
   2162 	tdata = prof_tdata_get(tsd, true);
   2163 	if (tdata == NULL) {
   2164 		return EAGAIN;
   2165 	}
   2166 
   2167 	/* Validate input. */
   2168 	if (thread_name == NULL) {
   2169 		return EFAULT;
   2170 	}
   2171 	for (i = 0; thread_name[i] != '\0'; i++) {
   2172 		char c = thread_name[i];
   2173 		if (!isgraph(c) && !isblank(c)) {
   2174 			return EFAULT;
   2175 		}
   2176 	}
   2177 
   2178 	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
   2179 	if (s == NULL) {
   2180 		return EAGAIN;
   2181 	}
   2182 
   2183 	if (tdata->thread_name != NULL) {
   2184 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
   2185 		    true);
   2186 		tdata->thread_name = NULL;
   2187 	}
   2188 	if (strlen(s) > 0) {
   2189 		tdata->thread_name = s;
   2190 	}
   2191 	return 0;
   2192 }
   2193 
   2194 bool
   2195 prof_thread_active_get(tsd_t *tsd) {
   2196 	prof_tdata_t *tdata;
   2197 
   2198 	tdata = prof_tdata_get(tsd, true);
   2199 	if (tdata == NULL) {
   2200 		return false;
   2201 	}
   2202 	return tdata->active;
   2203 }
   2204 
   2205 bool
   2206 prof_thread_active_set(tsd_t *tsd, bool active) {
   2207 	prof_tdata_t *tdata;
   2208 
   2209 	tdata = prof_tdata_get(tsd, true);
   2210 	if (tdata == NULL) {
   2211 		return true;
   2212 	}
   2213 	tdata->active = active;
   2214 	return false;
   2215 }
   2216 
   2217 bool
   2218 prof_thread_active_init_get(tsdn_t *tsdn) {
   2219 	bool active_init;
   2220 
   2221 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
   2222 	active_init = prof_thread_active_init;
   2223 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
   2224 	return active_init;
   2225 }
   2226 
   2227 bool
   2228 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
   2229 	bool active_init_old;
   2230 
   2231 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
   2232 	active_init_old = prof_thread_active_init;
   2233 	prof_thread_active_init = active_init;
   2234 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
   2235 	return active_init_old;
   2236 }
   2237 
   2238 bool
   2239 prof_gdump_get(tsdn_t *tsdn) {
   2240 	bool prof_gdump_current;
   2241 
   2242 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
   2243 	prof_gdump_current = prof_gdump_val;
   2244 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
   2245 	return prof_gdump_current;
   2246 }
   2247 
   2248 bool
   2249 prof_gdump_set(tsdn_t *tsdn, bool gdump) {
   2250 	bool prof_gdump_old;
   2251 
   2252 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
   2253 	prof_gdump_old = prof_gdump_val;
   2254 	prof_gdump_val = gdump;
   2255 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
   2256 	return prof_gdump_old;
   2257 }
   2258 
   2259 void
   2260 prof_boot0(void) {
   2261 	cassert(config_prof);
   2262 
   2263 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
   2264 	    sizeof(PROF_PREFIX_DEFAULT));
   2265 }
   2266 
   2267 void
   2268 prof_boot1(void) {
   2269 	cassert(config_prof);
   2270 
   2271 	/*
   2272 	 * opt_prof must be in its final state before any arenas are
   2273 	 * initialized, so this function must be executed early.
   2274 	 */
   2275 
   2276 	if (opt_prof_leak && !opt_prof) {
   2277 		/*
   2278 		 * Enable opt_prof, but in such a way that profiles are never
   2279 		 * automatically dumped.
   2280 		 */
   2281 		opt_prof = true;
   2282 		opt_prof_gdump = false;
   2283 	} else if (opt_prof) {
   2284 		if (opt_lg_prof_interval >= 0) {
   2285 			prof_interval = (((uint64_t)1U) <<
   2286 			    opt_lg_prof_interval);
   2287 		}
   2288 	}
   2289 }
   2290 
   2291 bool
   2292 prof_boot2(tsd_t *tsd) {
   2293 	cassert(config_prof);
   2294 
   2295 	if (opt_prof) {
   2296 		unsigned i;
   2297 
   2298 		lg_prof_sample = opt_lg_prof_sample;
   2299 
   2300 		prof_active = opt_prof_active;
   2301 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
   2302 		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
   2303 			return true;
   2304 		}
   2305 
   2306 		prof_gdump_val = opt_prof_gdump;
   2307 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
   2308 		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
   2309 			return true;
   2310 		}
   2311 
   2312 		prof_thread_active_init = opt_prof_thread_active_init;
   2313 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
   2314 		    "prof_thread_active_init",
   2315 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
   2316 		    malloc_mutex_rank_exclusive)) {
   2317 			return true;
   2318 		}
   2319 
   2320 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
   2321 		    prof_bt_keycomp)) {
   2322 			return true;
   2323 		}
   2324 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
   2325 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
   2326 			return true;
   2327 		}
   2328 
   2329 		tdata_tree_new(&tdatas);
   2330 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
   2331 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
   2332 			return true;
   2333 		}
   2334 
   2335 		next_thr_uid = 0;
   2336 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
   2337 		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
   2338 			return true;
   2339 		}
   2340 
   2341 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
   2342 		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
   2343 			return true;
   2344 		}
   2345 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
   2346 		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
   2347 			return true;
   2348 		}
   2349 
   2350 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
   2351 		    atexit(prof_fdump) != 0) {
   2352 			malloc_write("<jemalloc>: Error in atexit()\n");
   2353 			if (opt_abort) {
   2354 				abort();
   2355 			}
   2356 		}
   2357 
   2358 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
   2359 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
   2360 		    CACHELINE);
   2361 		if (gctx_locks == NULL) {
   2362 			return true;
   2363 		}
   2364 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
   2365 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
   2366 			    WITNESS_RANK_PROF_GCTX,
   2367 			    malloc_mutex_rank_exclusive)) {
   2368 				return true;
   2369 			}
   2370 		}
   2371 
   2372 		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
   2373 		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
   2374 		    CACHELINE);
   2375 		if (tdata_locks == NULL) {
   2376 			return true;
   2377 		}
   2378 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
   2379 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
   2380 			    WITNESS_RANK_PROF_TDATA,
   2381 			    malloc_mutex_rank_exclusive)) {
   2382 				return true;
   2383 			}
   2384 		}
   2385 	}
   2386 
   2387 #ifdef JEMALLOC_PROF_LIBGCC
   2388 	/*
   2389 	 * Cause the backtracing machinery to allocate its internal state
   2390 	 * before enabling profiling.
   2391 	 */
   2392 	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
   2393 #endif
   2394 
   2395 	prof_booted = true;
   2396 
   2397 	return false;
   2398 }
   2399 
   2400 void
   2401 prof_prefork0(tsdn_t *tsdn) {
   2402 	if (config_prof && opt_prof) {
   2403 		unsigned i;
   2404 
   2405 		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
   2406 		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
   2407 		malloc_mutex_prefork(tsdn, &tdatas_mtx);
   2408 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
   2409 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
   2410 		}
   2411 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
   2412 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
   2413 		}
   2414 	}
   2415 }
   2416 
   2417 void
   2418 prof_prefork1(tsdn_t *tsdn) {
   2419 	if (config_prof && opt_prof) {
   2420 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
   2421 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
   2422 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
   2423 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
   2424 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
   2425 	}
   2426 }
   2427 
   2428 void
   2429 prof_postfork_parent(tsdn_t *tsdn) {
   2430 	if (config_prof && opt_prof) {
   2431 		unsigned i;
   2432 
   2433 		malloc_mutex_postfork_parent(tsdn,
   2434 		    &prof_thread_active_init_mtx);
   2435 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
   2436 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
   2437 		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
   2438 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
   2439 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
   2440 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
   2441 		}
   2442 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
   2443 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
   2444 		}
   2445 		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
   2446 		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
   2447 		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
   2448 	}
   2449 }
   2450 
   2451 void
   2452 prof_postfork_child(tsdn_t *tsdn) {
   2453 	if (config_prof && opt_prof) {
   2454 		unsigned i;
   2455 
   2456 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
   2457 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
   2458 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
   2459 		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
   2460 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
   2461 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
   2462 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
   2463 		}
   2464 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
   2465 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
   2466 		}
   2467 		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
   2468 		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
   2469 		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
   2470 	}
   2471 }
   2472 
   2473 /******************************************************************************/
   2474