Home | History | Annotate | Download | only in internal
      1 /******************************************************************************/
      2 #ifdef JEMALLOC_H_TYPES
      3 
      4 typedef struct prof_bt_s prof_bt_t;
      5 typedef struct prof_cnt_s prof_cnt_t;
      6 typedef struct prof_tctx_s prof_tctx_t;
      7 typedef struct prof_gctx_s prof_gctx_t;
      8 typedef struct prof_tdata_s prof_tdata_t;
      9 
     10 /* Option defaults. */
     11 #ifdef JEMALLOC_PROF
     12 #  define PROF_PREFIX_DEFAULT		"jeprof"
     13 #else
     14 #  define PROF_PREFIX_DEFAULT		""
     15 #endif
     16 #define	LG_PROF_SAMPLE_DEFAULT		19
     17 #define	LG_PROF_INTERVAL_DEFAULT	-1
     18 
     19 /*
     20  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
     21  * is based on __builtin_return_address() necessarily has a hard-coded number
     22  * of backtrace frame handlers, and should be kept in sync with this setting.
     23  */
     24 #define	PROF_BT_MAX			128
     25 
     26 /* Initial hash table size. */
     27 #define	PROF_CKH_MINITEMS		64
     28 
     29 /* Size of memory buffer to use when writing dump files. */
     30 #define	PROF_DUMP_BUFSIZE		65536
     31 
     32 /* Size of stack-allocated buffer used by prof_printf(). */
     33 #define	PROF_PRINTF_BUFSIZE		128
     34 
     35 /*
     36  * Number of mutexes shared among all gctx's.  No space is allocated for these
     37  * unless profiling is enabled, so it's okay to over-provision.
     38  */
     39 #define	PROF_NCTX_LOCKS			1024
     40 
     41 /*
     42  * Number of mutexes shared among all tdata's.  No space is allocated for these
     43  * unless profiling is enabled, so it's okay to over-provision.
     44  */
     45 #define	PROF_NTDATA_LOCKS		256
     46 
     47 /*
     48  * prof_tdata pointers close to NULL are used to encode state information that
     49  * is used for cleaning up during thread shutdown.
     50  */
     51 #define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
     52 #define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
     53 #define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
     54 
     55 #endif /* JEMALLOC_H_TYPES */
     56 /******************************************************************************/
     57 #ifdef JEMALLOC_H_STRUCTS
     58 
     59 struct prof_bt_s {
     60 	/* Backtrace, stored as len program counters. */
     61 	void		**vec;
     62 	unsigned	len;
     63 };
     64 
     65 #ifdef JEMALLOC_PROF_LIBGCC
     66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
     67 typedef struct {
     68 	prof_bt_t	*bt;
     69 	unsigned	max;
     70 } prof_unwind_data_t;
     71 #endif
     72 
     73 struct prof_cnt_s {
     74 	/* Profiling counters. */
     75 	uint64_t	curobjs;
     76 	uint64_t	curbytes;
     77 	uint64_t	accumobjs;
     78 	uint64_t	accumbytes;
     79 };
     80 
     81 typedef enum {
     82 	prof_tctx_state_initializing,
     83 	prof_tctx_state_nominal,
     84 	prof_tctx_state_dumping,
     85 	prof_tctx_state_purgatory /* Dumper must finish destroying. */
     86 } prof_tctx_state_t;
     87 
     88 struct prof_tctx_s {
     89 	/* Thread data for thread that performed the allocation. */
     90 	prof_tdata_t		*tdata;
     91 
     92 	/*
     93 	 * Copy of tdata->thr_uid, necessary because tdata may be defunct during
     94 	 * teardown.
     95 	 */
     96 	uint64_t		thr_uid;
     97 
     98 	/* Profiling counters, protected by tdata->lock. */
     99 	prof_cnt_t		cnts;
    100 
    101 	/* Associated global context. */
    102 	prof_gctx_t		*gctx;
    103 
    104 	/*
    105 	 * UID that distinguishes multiple tctx's created by the same thread,
    106 	 * but coexisting in gctx->tctxs.  There are two ways that such
    107 	 * coexistence can occur:
    108 	 * - A dumper thread can cause a tctx to be retained in the purgatory
    109 	 *   state.
    110 	 * - Although a single "producer" thread must create all tctx's which
    111 	 *   share the same thr_uid, multiple "consumers" can each concurrently
    112 	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
    113 	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
    114 	 *   threshold can be hit again before the first consumer finishes
    115 	 *   executing prof_tctx_destroy().
    116 	 */
    117 	uint64_t		tctx_uid;
    118 
    119 	/* Linkage into gctx's tctxs. */
    120 	rb_node(prof_tctx_t)	tctx_link;
    121 
    122 	/*
    123 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
    124 	 * sample vs destroy race.
    125 	 */
    126 	bool			prepared;
    127 
    128 	/* Current dump-related state, protected by gctx->lock. */
    129 	prof_tctx_state_t	state;
    130 
    131 	/*
    132 	 * Copy of cnts snapshotted during early dump phase, protected by
    133 	 * dump_mtx.
    134 	 */
    135 	prof_cnt_t		dump_cnts;
    136 };
    137 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
    138 
    139 struct prof_gctx_s {
    140 	/* Protects nlimbo, cnt_summed, and tctxs. */
    141 	malloc_mutex_t		*lock;
    142 
    143 	/*
    144 	 * Number of threads that currently cause this gctx to be in a state of
    145 	 * limbo due to one of:
    146 	 *   - Initializing this gctx.
    147 	 *   - Initializing per thread counters associated with this gctx.
    148 	 *   - Preparing to destroy this gctx.
    149 	 *   - Dumping a heap profile that includes this gctx.
    150 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
    151 	 * gctx.
    152 	 */
    153 	unsigned		nlimbo;
    154 
    155 	/*
    156 	 * Tree of profile counters, one for each thread that has allocated in
    157 	 * this context.
    158 	 */
    159 	prof_tctx_tree_t	tctxs;
    160 
    161 	/* Linkage for tree of contexts to be dumped. */
    162 	rb_node(prof_gctx_t)	dump_link;
    163 
    164 	/* Temporary storage for summation during dump. */
    165 	prof_cnt_t		cnt_summed;
    166 
    167 	/* Associated backtrace. */
    168 	prof_bt_t		bt;
    169 
    170 	/* Backtrace vector, variable size, referred to by bt. */
    171 	void			*vec[1];
    172 };
    173 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
    174 
    175 struct prof_tdata_s {
    176 	malloc_mutex_t		*lock;
    177 
    178 	/* Monotonically increasing unique thread identifier. */
    179 	uint64_t		thr_uid;
    180 
    181 	/*
    182 	 * Monotonically increasing discriminator among tdata structures
    183 	 * associated with the same thr_uid.
    184 	 */
    185 	uint64_t		thr_discrim;
    186 
    187 	/* Included in heap profile dumps if non-NULL. */
    188 	char			*thread_name;
    189 
    190 	bool			attached;
    191 	bool			expired;
    192 
    193 	rb_node(prof_tdata_t)	tdata_link;
    194 
    195 	/*
    196 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
    197 	 * necessary when incrementing this field, because only one thread ever
    198 	 * does so.
    199 	 */
    200 	uint64_t		tctx_uid_next;
    201 
    202 	/*
    203 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
    204 	 * backtraces for which it has non-zero allocation/deallocation counters
    205 	 * associated with thread-specific prof_tctx_t objects.  Other threads
    206 	 * may write to prof_tctx_t contents when freeing associated objects.
    207 	 */
    208 	ckh_t			bt2tctx;
    209 
    210 	/* Sampling state. */
    211 	uint64_t		prng_state;
    212 	uint64_t		bytes_until_sample;
    213 
    214 	/* State used to avoid dumping while operating on prof internals. */
    215 	bool			enq;
    216 	bool			enq_idump;
    217 	bool			enq_gdump;
    218 
    219 	/*
    220 	 * Set to true during an early dump phase for tdata's which are
    221 	 * currently being dumped.  New threads' tdata's have this initialized
    222 	 * to false so that they aren't accidentally included in later dump
    223 	 * phases.
    224 	 */
    225 	bool			dumping;
    226 
    227 	/*
    228 	 * True if profiling is active for this tdata's thread
    229 	 * (thread.prof.active mallctl).
    230 	 */
    231 	bool			active;
    232 
    233 	/* Temporary storage for summation during dump. */
    234 	prof_cnt_t		cnt_summed;
    235 
    236 	/* Backtrace vector, used for calls to prof_backtrace(). */
    237 	void			*vec[PROF_BT_MAX];
    238 };
    239 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
    240 
    241 #endif /* JEMALLOC_H_STRUCTS */
    242 /******************************************************************************/
    243 #ifdef JEMALLOC_H_EXTERNS
    244 
    245 extern bool	opt_prof;
    246 extern bool	opt_prof_active;
    247 extern bool	opt_prof_thread_active_init;
    248 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
    249 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
    250 extern bool	opt_prof_gdump;       /* High-water memory dumping. */
    251 extern bool	opt_prof_final;       /* Final profile dumping. */
    252 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
    253 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
    254 extern char	opt_prof_prefix[
    255     /* Minimize memory bloat for non-prof builds. */
    256 #ifdef JEMALLOC_PROF
    257     PATH_MAX +
    258 #endif
    259     1];
    260 
    261 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
    262 extern bool	prof_active;
    263 
    264 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
    265 extern bool	prof_gdump_val;
    266 
    267 /*
    268  * Profile dump interval, measured in bytes allocated.  Each arena triggers a
    269  * profile dump when it reaches this threshold.  The effect is that the
    270  * interval between profile dumps averages prof_interval, though the actual
    271  * interval between dumps will tend to be sporadic, and the interval will be a
    272  * maximum of approximately (prof_interval * narenas).
    273  */
    274 extern uint64_t	prof_interval;
    275 
    276 /*
    277  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
    278  * resets.
    279  */
    280 extern size_t	lg_prof_sample;
    281 
    282 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
    283 void	prof_malloc_sample_object(const void *ptr, size_t usize,
    284     prof_tctx_t *tctx);
    285 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
    286 void	bt_init(prof_bt_t *bt, void **vec);
    287 void	prof_backtrace(prof_bt_t *bt);
    288 prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
    289 #ifdef JEMALLOC_JET
    290 size_t	prof_tdata_count(void);
    291 size_t	prof_bt_count(void);
    292 const prof_cnt_t *prof_cnt_all(void);
    293 typedef int (prof_dump_open_t)(bool, const char *);
    294 extern prof_dump_open_t *prof_dump_open;
    295 typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
    296 extern prof_dump_header_t *prof_dump_header;
    297 #endif
    298 void	prof_idump(void);
    299 bool	prof_mdump(const char *filename);
    300 void	prof_gdump(void);
    301 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
    302 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
    303 void	prof_reset(tsd_t *tsd, size_t lg_sample);
    304 void	prof_tdata_cleanup(tsd_t *tsd);
    305 const char	*prof_thread_name_get(void);
    306 bool	prof_active_get(void);
    307 bool	prof_active_set(bool active);
    308 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
    309 bool	prof_thread_active_get(void);
    310 bool	prof_thread_active_set(bool active);
    311 bool	prof_thread_active_init_get(void);
    312 bool	prof_thread_active_init_set(bool active_init);
    313 bool	prof_gdump_get(void);
    314 bool	prof_gdump_set(bool active);
    315 void	prof_boot0(void);
    316 void	prof_boot1(void);
    317 bool	prof_boot2(void);
    318 void	prof_prefork(void);
    319 void	prof_postfork_parent(void);
    320 void	prof_postfork_child(void);
    321 void	prof_sample_threshold_update(prof_tdata_t *tdata);
    322 
    323 #endif /* JEMALLOC_H_EXTERNS */
    324 /******************************************************************************/
    325 #ifdef JEMALLOC_H_INLINES
    326 
    327 #ifndef JEMALLOC_ENABLE_INLINE
    328 bool	prof_active_get_unlocked(void);
    329 bool	prof_gdump_get_unlocked(void);
    330 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
    331 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
    332     prof_tdata_t **tdata_out);
    333 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
    334 prof_tctx_t	*prof_tctx_get(const void *ptr);
    335 void	prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
    336 void	prof_malloc_sample_object(const void *ptr, size_t usize,
    337     prof_tctx_t *tctx);
    338 void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
    339 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
    340     prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
    341 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
    342 #endif
    343 
    344 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
    345 JEMALLOC_ALWAYS_INLINE bool
    346 prof_active_get_unlocked(void)
    347 {
    348 
    349 	/*
    350 	 * Even if opt_prof is true, sampling can be temporarily disabled by
    351 	 * setting prof_active to false.  No locking is used when reading
    352 	 * prof_active in the fast path, so there are no guarantees regarding
    353 	 * how long it will take for all threads to notice state changes.
    354 	 */
    355 	return (prof_active);
    356 }
    357 
    358 JEMALLOC_ALWAYS_INLINE bool
    359 prof_gdump_get_unlocked(void)
    360 {
    361 
    362 	/*
    363 	 * No locking is used when reading prof_gdump_val in the fast path, so
    364 	 * there are no guarantees regarding how long it will take for all
    365 	 * threads to notice state changes.
    366 	 */
    367 	return (prof_gdump_val);
    368 }
    369 
    370 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
    371 prof_tdata_get(tsd_t *tsd, bool create)
    372 {
    373 	prof_tdata_t *tdata;
    374 
    375 	cassert(config_prof);
    376 
    377 	tdata = tsd_prof_tdata_get(tsd);
    378 	if (create) {
    379 		if (unlikely(tdata == NULL)) {
    380 			if (tsd_nominal(tsd)) {
    381 				tdata = prof_tdata_init(tsd);
    382 				tsd_prof_tdata_set(tsd, tdata);
    383 			}
    384 		} else if (unlikely(tdata->expired)) {
    385 			tdata = prof_tdata_reinit(tsd, tdata);
    386 			tsd_prof_tdata_set(tsd, tdata);
    387 		}
    388 		assert(tdata == NULL || tdata->attached);
    389 	}
    390 
    391 	return (tdata);
    392 }
    393 
    394 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
    395 prof_tctx_get(const void *ptr)
    396 {
    397 
    398 	cassert(config_prof);
    399 	assert(ptr != NULL);
    400 
    401 	return (arena_prof_tctx_get(ptr));
    402 }
    403 
    404 JEMALLOC_ALWAYS_INLINE void
    405 prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
    406 {
    407 
    408 	cassert(config_prof);
    409 	assert(ptr != NULL);
    410 
    411 	arena_prof_tctx_set(ptr, tctx);
    412 }
    413 
    414 JEMALLOC_ALWAYS_INLINE bool
    415 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
    416     prof_tdata_t **tdata_out)
    417 {
    418 	prof_tdata_t *tdata;
    419 
    420 	cassert(config_prof);
    421 
    422 	tdata = prof_tdata_get(tsd, true);
    423 	if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
    424 		tdata = NULL;
    425 
    426 	if (tdata_out != NULL)
    427 		*tdata_out = tdata;
    428 
    429 	if (tdata == NULL)
    430 		return (true);
    431 
    432 	if (tdata->bytes_until_sample >= usize) {
    433 		if (update)
    434 			tdata->bytes_until_sample -= usize;
    435 		return (true);
    436 	} else {
    437 		/* Compute new sample threshold. */
    438 		if (update)
    439 			prof_sample_threshold_update(tdata);
    440 		return (!tdata->active);
    441 	}
    442 }
    443 
    444 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
    445 prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
    446 {
    447 	prof_tctx_t *ret;
    448 	prof_tdata_t *tdata;
    449 	prof_bt_t bt;
    450 
    451 	assert(usize == s2u(usize));
    452 
    453 	if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
    454 	    usize, update, &tdata)))
    455 		ret = (prof_tctx_t *)(uintptr_t)1U;
    456 	else {
    457 		bt_init(&bt, tdata->vec);
    458 		prof_backtrace(&bt);
    459 		ret = prof_lookup(tsd, &bt);
    460 	}
    461 
    462 	return (ret);
    463 }
    464 
    465 JEMALLOC_ALWAYS_INLINE void
    466 prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
    467 {
    468 
    469 	cassert(config_prof);
    470 	assert(ptr != NULL);
    471 	assert(usize == isalloc(ptr, true));
    472 
    473 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    474 		prof_malloc_sample_object(ptr, usize, tctx);
    475 	else
    476 		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
    477 }
    478 
    479 JEMALLOC_ALWAYS_INLINE void
    480 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
    481     bool updated, size_t old_usize, prof_tctx_t *old_tctx)
    482 {
    483 
    484 	cassert(config_prof);
    485 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
    486 
    487 	if (!updated && ptr != NULL) {
    488 		assert(usize == isalloc(ptr, true));
    489 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
    490 			/*
    491 			 * Don't sample.  The usize passed to PROF_ALLOC_PREP()
    492 			 * was larger than what actually got allocated, so a
    493 			 * backtrace was captured for this allocation, even
    494 			 * though its actual usize was insufficient to cross the
    495 			 * sample threshold.
    496 			 */
    497 			tctx = (prof_tctx_t *)(uintptr_t)1U;
    498 		}
    499 	}
    500 
    501 	if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
    502 		prof_free_sampled_object(tsd, old_usize, old_tctx);
    503 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    504 		prof_malloc_sample_object(ptr, usize, tctx);
    505 	else
    506 		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
    507 }
    508 
    509 JEMALLOC_ALWAYS_INLINE void
    510 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
    511 {
    512 	prof_tctx_t *tctx = prof_tctx_get(ptr);
    513 
    514 	cassert(config_prof);
    515 	assert(usize == isalloc(ptr, true));
    516 
    517 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
    518 		prof_free_sampled_object(tsd, usize, tctx);
    519 }
    520 #endif
    521 
    522 #endif /* JEMALLOC_H_INLINES */
    523 /******************************************************************************/
    524