Optimize fast path to allow static size class computation.

After inlining at LTO time, many callsites have input size known which means the
index and usable size can be translated at compile time.  However the size-index
lookup table prevents it -- this commit solves that by switching to the compute
approach when the size is detected to be a known const.
This commit is contained in:
Qi Wang 2024-09-11 15:08:24 -07:00 committed by Qi Wang
parent c1a3ca3755
commit 323ed2e3a8
3 changed files with 40 additions and 8 deletions

View File

@ -496,6 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
emap_alloc_ctx_t alloc_ctx;
size_t usize;
if (!size_hint) {
bool err = emap_alloc_ctx_try_lookup_fast(tsd,
&arena_emap_global, ptr, &alloc_ctx);
@ -507,6 +508,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
return false;
}
assert(alloc_ctx.szind != SC_NSIZES);
usize = sz_index2size(alloc_ctx.szind);
} else {
/*
* Check for both sizes that are too large, and for sampled /
@ -518,7 +520,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
/* check_prof */ true))) {
return false;
}
alloc_ctx.szind = sz_size2index_lookup(size);
sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
/* Max lookup class must be small. */
assert(alloc_ctx.szind < SC_NBINS);
/* This is a dead store, except when opt size checking is on. */
@ -534,7 +536,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
uint64_t deallocated, threshold;
te_free_fastpath_ctx(tsd, &deallocated, &threshold);
size_t usize = sz_index2size(alloc_ctx.szind);
uint64_t deallocated_after = deallocated + usize;
/*
* Check for events and tsd non-nominal (fast_threshold will be set to

View File

@ -152,8 +152,8 @@ sz_psz2u(size_t psz) {
return usize;
}
static inline szind_t
sz_size2index_compute(size_t size) {
JEMALLOC_ALWAYS_INLINE szind_t
sz_size2index_compute_inline(size_t size) {
if (unlikely(size > SC_LARGE_MAXCLASS)) {
return SC_NSIZES;
}
@ -186,6 +186,11 @@ sz_size2index_compute(size_t size) {
}
}
static inline szind_t
sz_size2index_compute(size_t size) {
return sz_size2index_compute_inline(size);
}
JEMALLOC_ALWAYS_INLINE szind_t
sz_size2index_lookup_impl(size_t size) {
assert(size <= SC_LOOKUP_MAXCLASS);
@ -208,8 +213,8 @@ sz_size2index(size_t size) {
return sz_size2index_compute(size);
}
static inline size_t
sz_index2size_compute(szind_t index) {
JEMALLOC_ALWAYS_INLINE size_t
sz_index2size_compute_inline(szind_t index) {
#if (SC_NTINY > 0)
if (index < SC_NTINY) {
return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
@ -234,6 +239,11 @@ sz_index2size_compute(szind_t index) {
}
}
static inline size_t
sz_index2size_compute(szind_t index) {
return sz_index2size_compute_inline(index);
}
JEMALLOC_ALWAYS_INLINE size_t
sz_index2size_lookup_impl(szind_t index) {
return sz_index2size_tab[index];
@ -254,8 +264,19 @@ sz_index2size(szind_t index) {
JEMALLOC_ALWAYS_INLINE void
sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
*ind = sz_size2index_lookup_impl(size);
*usize = sz_index2size_lookup_impl(*ind);
if (util_compile_time_const(size)) {
/*
* When inlined, the size may become known at compile
* time, which allows static computation through LTO.
*/
*ind = sz_size2index_compute_inline(size);
assert(*ind == sz_size2index_lookup_impl(size));
*usize = sz_index2size_compute_inline(*ind);
assert(*usize == sz_index2size_lookup_impl(*ind));
} else {
*ind = sz_size2index_lookup_impl(size);
*usize = sz_index2size_lookup_impl(*ind);
}
}
JEMALLOC_ALWAYS_INLINE size_t

View File

@ -79,6 +79,16 @@ get_errno(void) {
} while(0)
#endif
/* Allows compiler constant folding on inlined paths. */
#if defined(__has_builtin)
# if __has_builtin(__builtin_constant_p)
# define util_compile_time_const(x) __builtin_constant_p(x)
# endif
#endif
#ifndef util_compile_time_const
# define util_compile_time_const(x) (false)
#endif
/* ptr should be valid. */
JEMALLOC_ALWAYS_INLINE void
util_prefetch_read(void *ptr) {