Home | History | Annotate | Download | only in internal
      1 /******************************************************************************/
      2 #ifdef JEMALLOC_H_TYPES
      3 
      4 typedef struct prof_bt_s prof_bt_t;
      5 typedef struct prof_cnt_s prof_cnt_t;
      6 typedef struct prof_tctx_s prof_tctx_t;
      7 typedef struct prof_gctx_s prof_gctx_t;
      8 typedef struct prof_tdata_s prof_tdata_t;
      9 
     10 /* Option defaults. */
     11 #ifdef JEMALLOC_PROF
     12 #  define PROF_PREFIX_DEFAULT		"jeprof"
     13 #else
     14 #  define PROF_PREFIX_DEFAULT		""
     15 #endif
     16 #define	LG_PROF_SAMPLE_DEFAULT		19
     17 #define	LG_PROF_INTERVAL_DEFAULT	-1
     18 
     19 /*
     20  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
     21  * is based on __builtin_return_address() necessarily has a hard-coded number
     22  * of backtrace frame handlers, and should be kept in sync with this setting.
     23  */
     24 #define	PROF_BT_MAX			128
     25 
     26 /* Initial hash table size. */
     27 #define	PROF_CKH_MINITEMS		64
     28 
     29 /* Size of memory buffer to use when writing dump files. */
     30 #define	PROF_DUMP_BUFSIZE		65536
     31 
     32 /* Size of stack-allocated buffer used by prof_printf(). */
     33 #define	PROF_PRINTF_BUFSIZE		128
     34 
     35 /*
     36  * Number of mutexes shared among all gctx's.  No space is allocated for these
     37  * unless profiling is enabled, so it's okay to over-provision.
     38  */
     39 #define	PROF_NCTX_LOCKS			1024
     40 
     41 /*
     42  * Number of mutexes shared among all tdata's.  No space is allocated for these
     43  * unless profiling is enabled, so it's okay to over-provision.
     44  */
     45 #define	PROF_NTDATA_LOCKS		256
     46 
     47 /*
     48  * prof_tdata pointers close to NULL are used to encode state information that
     49  * is used for cleaning up during thread shutdown.
     50  */
     51 #define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
     52 #define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
     53 #define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
     54 
     55 #endif /* JEMALLOC_H_TYPES */
     56 /******************************************************************************/
     57 #ifdef JEMALLOC_H_STRUCTS
     58 
     59 struct prof_bt_s {
     60 	/* Backtrace, stored as len program counters. */
     61 	void		**vec;
     62 	unsigned	len;
     63 };
     64 
     65 #ifdef JEMALLOC_PROF_LIBGCC
     66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
     67 typedef struct {
     68 	prof_bt_t	*bt;
     69 	unsigned	max;
     70 } prof_unwind_data_t;
     71 #endif
     72 
     73 struct prof_cnt_s {
     74 	/* Profiling counters. */
     75 	uint64_t	curobjs;
     76 	uint64_t	curbytes;
     77 	uint64_t	accumobjs;
     78 	uint64_t	accumbytes;
     79 };
     80 
     81 typedef enum {
     82 	prof_tctx_state_initializing,
     83 	prof_tctx_state_nominal,
     84 	prof_tctx_state_dumping,
     85 	prof_tctx_state_purgatory /* Dumper must finish destroying. */
     86 } prof_tctx_state_t;
     87 
     88 struct prof_tctx_s {
     89 	/* Thread data for thread that performed the allocation. */
     90 	prof_tdata_t		*tdata;
     91 
     92 	/*
     93 	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
     94 	 * defunct during teardown.
     95 	 */
     96 	uint64_t		thr_uid;
     97 	uint64_t		thr_discrim;
     98 
     99 	/* Profiling counters, protected by tdata->lock. */
    100 	prof_cnt_t		cnts;
    101 
    102 	/* Associated global context. */
    103 	prof_gctx_t		*gctx;
    104 
    105 	/*
    106 	 * UID that distinguishes multiple tctx's created by the same thread,
    107 	 * but coexisting in gctx->tctxs.  There are two ways that such
    108 	 * coexistence can occur:
    109 	 * - A dumper thread can cause a tctx to be retained in the purgatory
    110 	 *   state.
    111 	 * - Although a single "producer" thread must create all tctx's which
    112 	 *   share the same thr_uid, multiple "consumers" can each concurrently
    113 	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
    114 	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
    115 	 *   threshold can be hit again before the first consumer finishes
    116 	 *   executing prof_tctx_destroy().
    117 	 */
    118 	uint64_t		tctx_uid;
    119 
    120 	/* Linkage into gctx's tctxs. */
    121 	rb_node(prof_tctx_t)	tctx_link;
    122 
    123 	/*
    124 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
    125 	 * sample vs destroy race.
    126 	 */
    127 	bool			prepared;
    128 
    129 	/* Current dump-related state, protected by gctx->lock. */
    130 	prof_tctx_state_t	state;
    131 
    132 	/*
    133 	 * Copy of cnts snapshotted during early dump phase, protected by
    134 	 * dump_mtx.
    135 	 */
    136 	prof_cnt_t		dump_cnts;
    137 };
    138 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
    139 
    140 struct prof_gctx_s {
    141 	/* Protects nlimbo, cnt_summed, and tctxs. */
    142 	malloc_mutex_t		*lock;
    143 
    144 	/*
    145 	 * Number of threads that currently cause this gctx to be in a state of
    146 	 * limbo due to one of:
    147 	 *   - Initializing this gctx.
    148 	 *   - Initializing per thread counters associated with this gctx.
    149 	 *   - Preparing to destroy this gctx.
    150 	 *   - Dumping a heap profile that includes this gctx.
    151 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
    152 	 * gctx.
    153 	 */
    154 	unsigned		nlimbo;
    155 
    156 	/*
    157 	 * Tree of profile counters, one for each thread that has allocated in
    158 	 * this context.
    159 	 */
    160 	prof_tctx_tree_t	tctxs;
    161 
    162 	/* Linkage for tree of contexts to be dumped. */
    163 	rb_node(prof_gctx_t)	dump_link;
    164 
    165 	/* Temporary storage for summation during dump. */
    166 	prof_cnt_t		cnt_summed;
    167 
    168 	/* Associated backtrace. */
    169 	prof_bt_t		bt;
    170 
    171 	/* Backtrace vector, variable size, referred to by bt. */
    172 	void			*vec[1];
    173 };
    174 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
    175 
    176 struct prof_tdata_s {
    177 	malloc_mutex_t		*lock;
    178 
    179 	/* Monotonically increasing unique thread identifier. */
    180 	uint64_t		thr_uid;
    181 
    182 	/*
    183 	 * Monotonically increasing discriminator among tdata structures
    184 	 * associated with the same thr_uid.
    185 	 */
    186 	uint64_t		thr_discrim;
    187 
    188 	/* Included in heap profile dumps if non-NULL. */
    189 	char			*thread_name;
    190 
    191 	bool			attached;
    192 	bool			expired;
    193 
    194 	rb_node(prof_tdata_t)	tdata_link;
    195 
    196 	/*
    197 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
    198 	 * necessary when incrementing this field, because only one thread ever
    199 	 * does so.
    200 	 */
    201 	uint64_t		tctx_uid_next;
    202 
    203 	/*
    204 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
    205 	 * backtraces for which it has non-zero allocation/deallocation counters
    206 	 * associated with thread-specific prof_tctx_t objects.  Other threads
    207 	 * may write to prof_tctx_t contents when freeing associated objects.
    208 	 */
    209 	ckh_t			bt2tctx;
    210 
    211 	/* Sampling state. */
    212 	uint64_t		prng_state;
    213 	uint64_t		bytes_until_sample;
    214 
    215 	/* State used to avoid dumping while operating on prof internals. */
    216 	bool			enq;
    217 	bool			enq_idump;
    218 	bool			enq_gdump;
    219 
    220 	/*
    221 	 * Set to true during an early dump phase for tdata's which are
    222 	 * currently being dumped.  New threads' tdata's have this initialized
    223 	 * to false so that they aren't accidentally included in later dump
    224 	 * phases.
    225 	 */
    226 	bool			dumping;
    227 
    228 	/*
    229 	 * True if profiling is active for this tdata's thread
    230 	 * (thread.prof.active mallctl).
    231 	 */
    232 	bool			active;
    233 
    234 	/* Temporary storage for summation during dump. */
    235 	prof_cnt_t		cnt_summed;
    236 
    237 	/* Backtrace vector, used for calls to prof_backtrace(). */
    238 	void			*vec[PROF_BT_MAX];
    239 };
    240 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
    241 
    242 #endif /* JEMALLOC_H_STRUCTS */
    243 /******************************************************************************/
    244 #ifdef JEMALLOC_H_EXTERNS
    245 
    246 extern bool	opt_prof;
    247 extern bool	opt_prof_active;
    248 extern bool	opt_prof_thread_active_init;
    249 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
    250 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
    251 extern bool	opt_prof_gdump;       /* High-water memory dumping. */
    252 extern bool	opt_prof_final;       /* Final profile dumping. */
    253 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
    254 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
    255 extern char	opt_prof_prefix[
    256     /* Minimize memory bloat for non-prof builds. */
    257 #ifdef JEMALLOC_PROF
    258     PATH_MAX +
    259 #endif
    260     1];
    261 
    262 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
    263 extern bool	prof_active;
    264 
    265 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
    266 extern bool	prof_gdump_val;
    267 
    268 /*
    269  * Profile dump interval, measured in bytes allocated.  Each arena triggers a
    270  * profile dump when it reaches this threshold.  The effect is that the
    271  * interval between profile dumps averages prof_interval, though the actual
    272  * interval between dumps will tend to be sporadic, and the interval will be a
    273  * maximum of approximately (prof_interval * narenas).
    274  */
    275 extern uint64_t	prof_interval;
    276 
    277 /*
    278  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
    279  * resets.
    280  */
    281 extern size_t	lg_prof_sample;
    282 
    283 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
    284 void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
    285     prof_tctx_t *tctx);
    286 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
    287 void	bt_init(prof_bt_t *bt, void **vec);
    288 void	prof_backtrace(prof_bt_t *bt);
    289 prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
    290 #ifdef JEMALLOC_JET
    291 size_t	prof_tdata_count(void);
    292 size_t	prof_bt_count(void);
    293 const prof_cnt_t *prof_cnt_all(void);
    294 typedef int (prof_dump_open_t)(bool, const char *);
    295 extern prof_dump_open_t *prof_dump_open;
    296 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
    297 extern prof_dump_header_t *prof_dump_header;
    298 #endif
    299 void	prof_idump(tsdn_t *tsdn);
    300 bool	prof_mdump(tsd_t *tsd, const char *filename);
    301 void	prof_gdump(tsdn_t *tsdn);
    302 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
    303 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
    304 void	prof_reset(tsd_t *tsd, size_t lg_sample);
    305 void	prof_tdata_cleanup(tsd_t *tsd);
    306 bool	prof_active_get(tsdn_t *tsdn);
    307 bool	prof_active_set(tsdn_t *tsdn, bool active);
    308 const char	*prof_thread_name_get(tsd_t *tsd);
    309 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
    310 bool	prof_thread_active_get(tsd_t *tsd);
    311 bool	prof_thread_active_set(tsd_t *tsd, bool active);
    312 bool	prof_thread_active_init_get(tsdn_t *tsdn);
    313 bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
    314 bool	prof_gdump_get(tsdn_t *tsdn);
    315 bool	prof_gdump_set(tsdn_t *tsdn, bool active);
    316 void	prof_boot0(void);
    317 void	prof_boot1(void);
    318 bool	prof_boot2(tsd_t *tsd);
    319 void	prof_prefork0(tsdn_t *tsdn);
    320 void	prof_prefork1(tsdn_t *tsdn);
    321 void	prof_postfork_parent(tsdn_t *tsdn);
    322 void	prof_postfork_child(tsdn_t *tsdn);
    323 void	prof_sample_threshold_update(prof_tdata_t *tdata);
    324 
    325 #endif /* JEMALLOC_H_EXTERNS */
    326 /******************************************************************************/
    327 #ifdef JEMALLOC_H_INLINES
    328 
    329 #ifndef JEMALLOC_ENABLE_INLINE
    330 bool	prof_active_get_unlocked(void);
    331 bool	prof_gdump_get_unlocked(void);
    332 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
    333 prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
    334 void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
    335     prof_tctx_t *tctx);
    336 void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
    337     const void *old_ptr, prof_tctx_t *tctx);
    338 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
    339     prof_tdata_t **tdata_out);
    340 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
    341     bool update);
    342 void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
    343     prof_tctx_t *tctx);
    344 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
    345     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
    346     size_t old_usize, prof_tctx_t *old_tctx);
    347 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
    348 #endif
    349 
    350 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
    351 JEMALLOC_ALWAYS_INLINE bool
    352 prof_active_get_unlocked(void)
    353 {
    354 
    355 	/*
    356 	 * Even if opt_prof is true, sampling can be temporarily disabled by
    357 	 * setting prof_active to false.  No locking is used when reading
    358 	 * prof_active in the fast path, so there are no guarantees regarding
    359 	 * how long it will take for all threads to notice state changes.
    360 	 */
    361 	return (prof_active);
    362 }
    363 
    364 JEMALLOC_ALWAYS_INLINE bool
    365 prof_gdump_get_unlocked(void)
    366 {
    367 
    368 	/*
    369 	 * No locking is used when reading prof_gdump_val in the fast path, so
    370 	 * there are no guarantees regarding how long it will take for all
    371 	 * threads to notice state changes.
    372 	 */
    373 	return (prof_gdump_val);
    374 }
    375 
    376 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
    377 prof_tdata_get(tsd_t *tsd, bool create)
    378 {
    379 	prof_tdata_t *tdata;
    380 
    381 	cassert(config_prof);
    382 
    383 	tdata = tsd_prof_tdata_get(tsd);
    384 	if (create) {
    385 		if (unlikely(tdata == NULL)) {
    386 			if (tsd_nominal(tsd)) {
    387 				tdata = prof_tdata_init(tsd);
    388 				tsd_prof_tdata_set(tsd, tdata);
    389 			}
    390 		} else if (unlikely(tdata->expired)) {
    391 			tdata = prof_tdata_reinit(tsd, tdata);
    392 			tsd_prof_tdata_set(tsd, tdata);
    393 		}
    394 		assert(tdata == NULL || tdata->attached);
    395 	}
    396 
    397 	return (tdata);
    398 }
    399 
    400 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
    401 prof_tctx_get(tsdn_t *tsdn, const void *ptr)
    402 {
    403 
    404 	cassert(config_prof);
    405 	assert(ptr != NULL);
    406 
    407 	return (arena_prof_tctx_get(tsdn, ptr));
    408 }
    409 
    410 JEMALLOC_ALWAYS_INLINE void
    411 prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
    412 {
    413 
    414 	cassert(config_prof);
    415 	assert(ptr != NULL);
    416 
    417 	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
    418 }
    419 
    420 JEMALLOC_ALWAYS_INLINE void
    421 prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
    422     prof_tctx_t *old_tctx)
    423 {
    424 
    425 	cassert(config_prof);
    426 	assert(ptr != NULL);
    427 
    428 	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
    429 }
    430 
    431 JEMALLOC_ALWAYS_INLINE bool
    432 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
    433     prof_tdata_t **tdata_out)
    434 {
    435 	prof_tdata_t *tdata;
    436 
    437 	cassert(config_prof);
    438 
    439 	tdata = prof_tdata_get(tsd, true);
    440 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
    441 		tdata = NULL;
    442 
    443 	if (tdata_out != NULL)
    444 		*tdata_out = tdata;
    445 
    446 	if (unlikely(tdata == NULL))
    447 		return (true);
    448 
    449 	if (likely(tdata->bytes_until_sample >= usize)) {
    450 		if (update)
    451 			tdata->bytes_until_sample -= usize;
    452 		return (true);
    453 	} else {
    454 		/* Compute new sample threshold. */
    455 		if (update)
    456 			prof_sample_threshold_update(tdata);
    457 		return (!tdata->active);
    458 	}
    459 }
    460 
    461 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
    462 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
    463 {
    464 	prof_tctx_t *ret;
    465 	prof_tdata_t *tdata;
    466 	prof_bt_t bt;
    467 
    468 	assert(usize == s2u(usize));
    469 
    470 	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
    471 	    &tdata)))
    472 		ret = (prof_tctx_t *)(uintptr_t)1U;
    473 	else {
    474 		bt_init(&bt, tdata->vec);
    475 		prof_backtrace(&bt);
    476 		ret = prof_lookup(tsd, &bt);
    477 	}
    478 
    479 	return (ret);
    480 }
    481 
    482 JEMALLOC_ALWAYS_INLINE void
    483 prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
    484 {
    485 
    486 	cassert(config_prof);
    487 	assert(ptr != NULL);
    488 	assert(usize == isalloc(tsdn, ptr, true));
    489 
    490 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    491 		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
    492 	else
    493 		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
    494 }
    495 
    496 JEMALLOC_ALWAYS_INLINE void
    497 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
    498     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
    499     prof_tctx_t *old_tctx)
    500 {
    501 	bool sampled, old_sampled;
    502 
    503 	cassert(config_prof);
    504 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
    505 
    506 	if (prof_active && !updated && ptr != NULL) {
    507 		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
    508 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
    509 			/*
    510 			 * Don't sample.  The usize passed to prof_alloc_prep()
    511 			 * was larger than what actually got allocated, so a
    512 			 * backtrace was captured for this allocation, even
    513 			 * though its actual usize was insufficient to cross the
    514 			 * sample threshold.
    515 			 */
    516 			prof_alloc_rollback(tsd, tctx, true);
    517 			tctx = (prof_tctx_t *)(uintptr_t)1U;
    518 		}
    519 	}
    520 
    521 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
    522 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
    523 
    524 	if (unlikely(sampled))
    525 		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
    526 	else
    527 		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
    528 
    529 	if (unlikely(old_sampled))
    530 		prof_free_sampled_object(tsd, old_usize, old_tctx);
    531 }
    532 
    533 JEMALLOC_ALWAYS_INLINE void
    534 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
    535 {
    536 	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
    537 
    538 	cassert(config_prof);
    539 	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
    540 
    541 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    542 		prof_free_sampled_object(tsd, usize, tctx);
    543 }
    544 #endif
    545 
    546 #endif /* JEMALLOC_H_INLINES */
    547 /******************************************************************************/
    548