Optimize fast path to allow static size class computation.
After inlining at LTO time, many callsites have input size known which means the index and usable size can be translated at compile time. However the size-index lookup table prevents it -- this commit solves that by switching to the compute approach when the size is detected to be a known const.
This commit is contained in:
parent
c1a3ca3755
commit
323ed2e3a8
|
@ -496,6 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
|
||||
|
||||
emap_alloc_ctx_t alloc_ctx;
|
||||
size_t usize;
|
||||
if (!size_hint) {
|
||||
bool err = emap_alloc_ctx_try_lookup_fast(tsd,
|
||||
&arena_emap_global, ptr, &alloc_ctx);
|
||||
|
@ -507,6 +508,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||
return false;
|
||||
}
|
||||
assert(alloc_ctx.szind != SC_NSIZES);
|
||||
usize = sz_index2size(alloc_ctx.szind);
|
||||
} else {
|
||||
/*
|
||||
* Check for both sizes that are too large, and for sampled /
|
||||
|
@ -518,7 +520,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||
/* check_prof */ true))) {
|
||||
return false;
|
||||
}
|
||||
alloc_ctx.szind = sz_size2index_lookup(size);
|
||||
sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
|
||||
/* Max lookup class must be small. */
|
||||
assert(alloc_ctx.szind < SC_NBINS);
|
||||
/* This is a dead store, except when opt size checking is on. */
|
||||
|
@ -534,7 +536,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||
uint64_t deallocated, threshold;
|
||||
te_free_fastpath_ctx(tsd, &deallocated, &threshold);
|
||||
|
||||
size_t usize = sz_index2size(alloc_ctx.szind);
|
||||
uint64_t deallocated_after = deallocated + usize;
|
||||
/*
|
||||
* Check for events and tsd non-nominal (fast_threshold will be set to
|
||||
|
|
|
@ -152,8 +152,8 @@ sz_psz2u(size_t psz) {
|
|||
return usize;
|
||||
}
|
||||
|
||||
static inline szind_t
|
||||
sz_size2index_compute(size_t size) {
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
sz_size2index_compute_inline(size_t size) {
|
||||
if (unlikely(size > SC_LARGE_MAXCLASS)) {
|
||||
return SC_NSIZES;
|
||||
}
|
||||
|
@ -186,6 +186,11 @@ sz_size2index_compute(size_t size) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline szind_t
|
||||
sz_size2index_compute(size_t size) {
|
||||
return sz_size2index_compute_inline(size);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
sz_size2index_lookup_impl(size_t size) {
|
||||
assert(size <= SC_LOOKUP_MAXCLASS);
|
||||
|
@ -208,8 +213,8 @@ sz_size2index(size_t size) {
|
|||
return sz_size2index_compute(size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
sz_index2size_compute(szind_t index) {
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
sz_index2size_compute_inline(szind_t index) {
|
||||
#if (SC_NTINY > 0)
|
||||
if (index < SC_NTINY) {
|
||||
return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
|
||||
|
@ -234,6 +239,11 @@ sz_index2size_compute(szind_t index) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
sz_index2size_compute(szind_t index) {
|
||||
return sz_index2size_compute_inline(index);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
sz_index2size_lookup_impl(szind_t index) {
|
||||
return sz_index2size_tab[index];
|
||||
|
@ -254,8 +264,19 @@ sz_index2size(szind_t index) {
|
|||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
|
||||
*ind = sz_size2index_lookup_impl(size);
|
||||
*usize = sz_index2size_lookup_impl(*ind);
|
||||
if (util_compile_time_const(size)) {
|
||||
/*
|
||||
* When inlined, the size may become known at compile
|
||||
* time, which allows static computation through LTO.
|
||||
*/
|
||||
*ind = sz_size2index_compute_inline(size);
|
||||
assert(*ind == sz_size2index_lookup_impl(size));
|
||||
*usize = sz_index2size_compute_inline(*ind);
|
||||
assert(*usize == sz_index2size_lookup_impl(*ind));
|
||||
} else {
|
||||
*ind = sz_size2index_lookup_impl(size);
|
||||
*usize = sz_index2size_lookup_impl(*ind);
|
||||
}
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
|
|
|
@ -79,6 +79,16 @@ get_errno(void) {
|
|||
} while(0)
|
||||
#endif
|
||||
|
||||
/* Allows compiler constant folding on inlined paths. */
|
||||
#if defined(__has_builtin)
|
||||
# if __has_builtin(__builtin_constant_p)
|
||||
# define util_compile_time_const(x) __builtin_constant_p(x)
|
||||
# endif
|
||||
#endif
|
||||
#ifndef util_compile_time_const
|
||||
# define util_compile_time_const(x) (false)
|
||||
#endif
|
||||
|
||||
/* ptr should be valid. */
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
util_prefetch_read(void *ptr) {
|
||||
|
|
Loading…
Reference in New Issue