Home | History | Annotate | Download | only in internal
      1 /******************************************************************************/
      2 #ifdef JEMALLOC_H_TYPES
      3 
      4 typedef struct prof_bt_s prof_bt_t;
      5 typedef struct prof_cnt_s prof_cnt_t;
      6 typedef struct prof_tctx_s prof_tctx_t;
      7 typedef struct prof_gctx_s prof_gctx_t;
      8 typedef struct prof_tdata_s prof_tdata_t;
      9 
     10 /* Option defaults. */
     11 #ifdef JEMALLOC_PROF
     12 #  define PROF_PREFIX_DEFAULT		"jeprof"
     13 #else
     14 #  define PROF_PREFIX_DEFAULT		""
     15 #endif
     16 #define	LG_PROF_SAMPLE_DEFAULT		19
     17 #define	LG_PROF_INTERVAL_DEFAULT	-1
     18 
     19 /*
     20  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
     21  * is based on __builtin_return_address() necessarily has a hard-coded number
     22  * of backtrace frame handlers, and should be kept in sync with this setting.
     23  */
     24 #define	PROF_BT_MAX			128
     25 
     26 /* Initial hash table size. */
     27 #define	PROF_CKH_MINITEMS		64
     28 
     29 /* Size of memory buffer to use when writing dump files. */
     30 #define	PROF_DUMP_BUFSIZE		65536
     31 
     32 /* Size of stack-allocated buffer used by prof_printf(). */
     33 #define	PROF_PRINTF_BUFSIZE		128
     34 
     35 /*
     36  * Number of mutexes shared among all gctx's.  No space is allocated for these
     37  * unless profiling is enabled, so it's okay to over-provision.
     38  */
     39 #define	PROF_NCTX_LOCKS			1024
     40 
     41 /*
     42  * Number of mutexes shared among all tdata's.  No space is allocated for these
     43  * unless profiling is enabled, so it's okay to over-provision.
     44  */
     45 #define	PROF_NTDATA_LOCKS		256
     46 
     47 /*
     48  * prof_tdata pointers close to NULL are used to encode state information that
     49  * is used for cleaning up during thread shutdown.
     50  */
     51 #define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
     52 #define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
     53 #define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
     54 
     55 #endif /* JEMALLOC_H_TYPES */
     56 /******************************************************************************/
     57 #ifdef JEMALLOC_H_STRUCTS
     58 
     59 struct prof_bt_s {
     60 	/* Backtrace, stored as len program counters. */
     61 	void		**vec;
     62 	unsigned	len;
     63 };
     64 
     65 #ifdef JEMALLOC_PROF_LIBGCC
     66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
     67 typedef struct {
     68 	prof_bt_t	*bt;
     69 	unsigned	max;
     70 } prof_unwind_data_t;
     71 #endif
     72 
     73 struct prof_cnt_s {
     74 	/* Profiling counters. */
     75 	uint64_t	curobjs;
     76 	uint64_t	curbytes;
     77 	uint64_t	accumobjs;
     78 	uint64_t	accumbytes;
     79 };
     80 
     81 typedef enum {
     82 	prof_tctx_state_initializing,
     83 	prof_tctx_state_nominal,
     84 	prof_tctx_state_dumping,
     85 	prof_tctx_state_purgatory /* Dumper must finish destroying. */
     86 } prof_tctx_state_t;
     87 
     88 struct prof_tctx_s {
     89 	/* Thread data for thread that performed the allocation. */
     90 	prof_tdata_t		*tdata;
     91 
     92 	/*
     93 	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
     94 	 * defunct during teardown.
     95 	 */
     96 	uint64_t		thr_uid;
     97 	uint64_t		thr_discrim;
     98 
     99 	/* Profiling counters, protected by tdata->lock. */
    100 	prof_cnt_t		cnts;
    101 
    102 	/* Associated global context. */
    103 	prof_gctx_t		*gctx;
    104 
    105 	/*
    106 	 * UID that distinguishes multiple tctx's created by the same thread,
    107 	 * but coexisting in gctx->tctxs.  There are two ways that such
    108 	 * coexistence can occur:
    109 	 * - A dumper thread can cause a tctx to be retained in the purgatory
    110 	 *   state.
    111 	 * - Although a single "producer" thread must create all tctx's which
    112 	 *   share the same thr_uid, multiple "consumers" can each concurrently
    113 	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
    114 	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
    115 	 *   threshold can be hit again before the first consumer finishes
    116 	 *   executing prof_tctx_destroy().
    117 	 */
    118 	uint64_t		tctx_uid;
    119 
    120 	/* Linkage into gctx's tctxs. */
    121 	rb_node(prof_tctx_t)	tctx_link;
    122 
    123 	/*
    124 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
    125 	 * sample vs destroy race.
    126 	 */
    127 	bool			prepared;
    128 
    129 	/* Current dump-related state, protected by gctx->lock. */
    130 	prof_tctx_state_t	state;
    131 
    132 	/*
    133 	 * Copy of cnts snapshotted during early dump phase, protected by
    134 	 * dump_mtx.
    135 	 */
    136 	prof_cnt_t		dump_cnts;
    137 };
    138 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
    139 
    140 struct prof_gctx_s {
    141 	/* Protects nlimbo, cnt_summed, and tctxs. */
    142 	malloc_mutex_t		*lock;
    143 
    144 	/*
    145 	 * Number of threads that currently cause this gctx to be in a state of
    146 	 * limbo due to one of:
    147 	 *   - Initializing this gctx.
    148 	 *   - Initializing per thread counters associated with this gctx.
    149 	 *   - Preparing to destroy this gctx.
    150 	 *   - Dumping a heap profile that includes this gctx.
    151 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
    152 	 * gctx.
    153 	 */
    154 	unsigned		nlimbo;
    155 
    156 	/*
    157 	 * Tree of profile counters, one for each thread that has allocated in
    158 	 * this context.
    159 	 */
    160 	prof_tctx_tree_t	tctxs;
    161 
    162 	/* Linkage for tree of contexts to be dumped. */
    163 	rb_node(prof_gctx_t)	dump_link;
    164 
    165 	/* Temporary storage for summation during dump. */
    166 	prof_cnt_t		cnt_summed;
    167 
    168 	/* Associated backtrace. */
    169 	prof_bt_t		bt;
    170 
    171 	/* Backtrace vector, variable size, referred to by bt. */
    172 	void			*vec[1];
    173 };
    174 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
    175 
    176 struct prof_tdata_s {
    177 	malloc_mutex_t		*lock;
    178 
    179 	/* Monotonically increasing unique thread identifier. */
    180 	uint64_t		thr_uid;
    181 
    182 	/*
    183 	 * Monotonically increasing discriminator among tdata structures
    184 	 * associated with the same thr_uid.
    185 	 */
    186 	uint64_t		thr_discrim;
    187 
    188 	/* Included in heap profile dumps if non-NULL. */
    189 	char			*thread_name;
    190 
    191 	bool			attached;
    192 	bool			expired;
    193 
    194 	rb_node(prof_tdata_t)	tdata_link;
    195 
    196 	/*
    197 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
    198 	 * necessary when incrementing this field, because only one thread ever
    199 	 * does so.
    200 	 */
    201 	uint64_t		tctx_uid_next;
    202 
    203 	/*
    204 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
    205 	 * backtraces for which it has non-zero allocation/deallocation counters
    206 	 * associated with thread-specific prof_tctx_t objects.  Other threads
    207 	 * may write to prof_tctx_t contents when freeing associated objects.
    208 	 */
    209 	ckh_t			bt2tctx;
    210 
    211 	/* Sampling state. */
    212 	uint64_t		prng_state;
    213 	uint64_t		bytes_until_sample;
    214 
    215 	/* State used to avoid dumping while operating on prof internals. */
    216 	bool			enq;
    217 	bool			enq_idump;
    218 	bool			enq_gdump;
    219 
    220 	/*
    221 	 * Set to true during an early dump phase for tdata's which are
    222 	 * currently being dumped.  New threads' tdata's have this initialized
    223 	 * to false so that they aren't accidentally included in later dump
    224 	 * phases.
    225 	 */
    226 	bool			dumping;
    227 
    228 	/*
    229 	 * True if profiling is active for this tdata's thread
    230 	 * (thread.prof.active mallctl).
    231 	 */
    232 	bool			active;
    233 
    234 	/* Temporary storage for summation during dump. */
    235 	prof_cnt_t		cnt_summed;
    236 
    237 	/* Backtrace vector, used for calls to prof_backtrace(). */
    238 	void			*vec[PROF_BT_MAX];
    239 };
    240 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
    241 
    242 #endif /* JEMALLOC_H_STRUCTS */
    243 /******************************************************************************/
    244 #ifdef JEMALLOC_H_EXTERNS
    245 
    246 extern bool	opt_prof;
    247 extern bool	opt_prof_active;
    248 extern bool	opt_prof_thread_active_init;
    249 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
    250 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
    251 extern bool	opt_prof_gdump;       /* High-water memory dumping. */
    252 extern bool	opt_prof_final;       /* Final profile dumping. */
    253 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
    254 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
    255 extern char	opt_prof_prefix[
    256     /* Minimize memory bloat for non-prof builds. */
    257 #ifdef JEMALLOC_PROF
    258     PATH_MAX +
    259 #endif
    260     1];
    261 
    262 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
    263 extern bool	prof_active;
    264 
    265 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
    266 extern bool	prof_gdump_val;
    267 
    268 /*
    269  * Profile dump interval, measured in bytes allocated.  Each arena triggers a
    270  * profile dump when it reaches this threshold.  The effect is that the
    271  * interval between profile dumps averages prof_interval, though the actual
    272  * interval between dumps will tend to be sporadic, and the interval will be a
    273  * maximum of approximately (prof_interval * narenas).
    274  */
    275 extern uint64_t	prof_interval;
    276 
    277 /*
    278  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
    279  * resets.
    280  */
    281 extern size_t	lg_prof_sample;
    282 
    283 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
    284 void	prof_malloc_sample_object(const void *ptr, size_t usize,
    285     prof_tctx_t *tctx);
    286 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
    287 void	bt_init(prof_bt_t *bt, void **vec);
    288 void	prof_backtrace(prof_bt_t *bt);
    289 prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
    290 #ifdef JEMALLOC_JET
    291 size_t	prof_tdata_count(void);
    292 size_t	prof_bt_count(void);
    293 const prof_cnt_t *prof_cnt_all(void);
    294 typedef int (prof_dump_open_t)(bool, const char *);
    295 extern prof_dump_open_t *prof_dump_open;
    296 typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
    297 extern prof_dump_header_t *prof_dump_header;
    298 #endif
    299 void	prof_idump(void);
    300 bool	prof_mdump(const char *filename);
    301 void	prof_gdump(void);
    302 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
    303 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
    304 void	prof_reset(tsd_t *tsd, size_t lg_sample);
    305 void	prof_tdata_cleanup(tsd_t *tsd);
    306 const char	*prof_thread_name_get(void);
    307 bool	prof_active_get(void);
    308 bool	prof_active_set(bool active);
    309 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
    310 bool	prof_thread_active_get(void);
    311 bool	prof_thread_active_set(bool active);
    312 bool	prof_thread_active_init_get(void);
    313 bool	prof_thread_active_init_set(bool active_init);
    314 bool	prof_gdump_get(void);
    315 bool	prof_gdump_set(bool active);
    316 void	prof_boot0(void);
    317 void	prof_boot1(void);
    318 bool	prof_boot2(void);
    319 void	prof_prefork(void);
    320 void	prof_postfork_parent(void);
    321 void	prof_postfork_child(void);
    322 void	prof_sample_threshold_update(prof_tdata_t *tdata);
    323 
    324 #endif /* JEMALLOC_H_EXTERNS */
    325 /******************************************************************************/
    326 #ifdef JEMALLOC_H_INLINES
    327 
    328 #ifndef JEMALLOC_ENABLE_INLINE
    329 bool	prof_active_get_unlocked(void);
    330 bool	prof_gdump_get_unlocked(void);
    331 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
    332 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
    333     prof_tdata_t **tdata_out);
    334 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
    335     bool update);
    336 prof_tctx_t	*prof_tctx_get(const void *ptr);
    337 void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
    338 void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
    339     prof_tctx_t *tctx);
    340 void	prof_malloc_sample_object(const void *ptr, size_t usize,
    341     prof_tctx_t *tctx);
    342 void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
    343 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
    344     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
    345     size_t old_usize, prof_tctx_t *old_tctx);
    346 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
    347 #endif
    348 
    349 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
    350 JEMALLOC_ALWAYS_INLINE bool
    351 prof_active_get_unlocked(void)
    352 {
    353 
    354 	/*
    355 	 * Even if opt_prof is true, sampling can be temporarily disabled by
    356 	 * setting prof_active to false.  No locking is used when reading
    357 	 * prof_active in the fast path, so there are no guarantees regarding
    358 	 * how long it will take for all threads to notice state changes.
    359 	 */
    360 	return (prof_active);
    361 }
    362 
    363 JEMALLOC_ALWAYS_INLINE bool
    364 prof_gdump_get_unlocked(void)
    365 {
    366 
    367 	/*
    368 	 * No locking is used when reading prof_gdump_val in the fast path, so
    369 	 * there are no guarantees regarding how long it will take for all
    370 	 * threads to notice state changes.
    371 	 */
    372 	return (prof_gdump_val);
    373 }
    374 
    375 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
    376 prof_tdata_get(tsd_t *tsd, bool create)
    377 {
    378 	prof_tdata_t *tdata;
    379 
    380 	cassert(config_prof);
    381 
    382 	tdata = tsd_prof_tdata_get(tsd);
    383 	if (create) {
    384 		if (unlikely(tdata == NULL)) {
    385 			if (tsd_nominal(tsd)) {
    386 				tdata = prof_tdata_init(tsd);
    387 				tsd_prof_tdata_set(tsd, tdata);
    388 			}
    389 		} else if (unlikely(tdata->expired)) {
    390 			tdata = prof_tdata_reinit(tsd, tdata);
    391 			tsd_prof_tdata_set(tsd, tdata);
    392 		}
    393 		assert(tdata == NULL || tdata->attached);
    394 	}
    395 
    396 	return (tdata);
    397 }
    398 
    399 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
    400 prof_tctx_get(const void *ptr)
    401 {
    402 
    403 	cassert(config_prof);
    404 	assert(ptr != NULL);
    405 
    406 	return (arena_prof_tctx_get(ptr));
    407 }
    408 
    409 JEMALLOC_ALWAYS_INLINE void
    410 prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
    411 {
    412 
    413 	cassert(config_prof);
    414 	assert(ptr != NULL);
    415 
    416 	arena_prof_tctx_set(ptr, usize, tctx);
    417 }
    418 
    419 JEMALLOC_ALWAYS_INLINE void
    420 prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
    421     prof_tctx_t *old_tctx)
    422 {
    423 
    424 	cassert(config_prof);
    425 	assert(ptr != NULL);
    426 
    427 	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
    428 }
    429 
    430 JEMALLOC_ALWAYS_INLINE bool
    431 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
    432     prof_tdata_t **tdata_out)
    433 {
    434 	prof_tdata_t *tdata;
    435 
    436 	cassert(config_prof);
    437 
    438 	tdata = prof_tdata_get(tsd, true);
    439 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
    440 		tdata = NULL;
    441 
    442 	if (tdata_out != NULL)
    443 		*tdata_out = tdata;
    444 
    445 	if (unlikely(tdata == NULL))
    446 		return (true);
    447 
    448 	if (likely(tdata->bytes_until_sample >= usize)) {
    449 		if (update)
    450 			tdata->bytes_until_sample -= usize;
    451 		return (true);
    452 	} else {
    453 		/* Compute new sample threshold. */
    454 		if (update)
    455 			prof_sample_threshold_update(tdata);
    456 		return (!tdata->active);
    457 	}
    458 }
    459 
    460 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
    461 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
    462 {
    463 	prof_tctx_t *ret;
    464 	prof_tdata_t *tdata;
    465 	prof_bt_t bt;
    466 
    467 	assert(usize == s2u(usize));
    468 
    469 	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
    470 	    &tdata)))
    471 		ret = (prof_tctx_t *)(uintptr_t)1U;
    472 	else {
    473 		bt_init(&bt, tdata->vec);
    474 		prof_backtrace(&bt);
    475 		ret = prof_lookup(tsd, &bt);
    476 	}
    477 
    478 	return (ret);
    479 }
    480 
    481 JEMALLOC_ALWAYS_INLINE void
    482 prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
    483 {
    484 
    485 	cassert(config_prof);
    486 	assert(ptr != NULL);
    487 	assert(usize == isalloc(ptr, true));
    488 
    489 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    490 		prof_malloc_sample_object(ptr, usize, tctx);
    491 	else
    492 		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
    493 }
    494 
    495 JEMALLOC_ALWAYS_INLINE void
    496 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
    497     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
    498     prof_tctx_t *old_tctx)
    499 {
    500 	bool sampled, old_sampled;
    501 
    502 	cassert(config_prof);
    503 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
    504 
    505 	if (prof_active && !updated && ptr != NULL) {
    506 		assert(usize == isalloc(ptr, true));
    507 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
    508 			/*
    509 			 * Don't sample.  The usize passed to prof_alloc_prep()
    510 			 * was larger than what actually got allocated, so a
    511 			 * backtrace was captured for this allocation, even
    512 			 * though its actual usize was insufficient to cross the
    513 			 * sample threshold.
    514 			 */
    515 			tctx = (prof_tctx_t *)(uintptr_t)1U;
    516 		}
    517 	}
    518 
    519 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
    520 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
    521 
    522 	if (unlikely(sampled))
    523 		prof_malloc_sample_object(ptr, usize, tctx);
    524 	else
    525 		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
    526 
    527 	if (unlikely(old_sampled))
    528 		prof_free_sampled_object(tsd, old_usize, old_tctx);
    529 }
    530 
    531 JEMALLOC_ALWAYS_INLINE void
    532 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
    533 {
    534 	prof_tctx_t *tctx = prof_tctx_get(ptr);
    535 
    536 	cassert(config_prof);
    537 	assert(usize == isalloc(ptr, true));
    538 
    539 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    540 		prof_free_sampled_object(tsd, usize, tctx);
    541 }
    542 #endif
    543 
    544 #endif /* JEMALLOC_H_INLINES */
    545 /******************************************************************************/
    546