Compare commits

...

3 Commits

Author SHA1 Message Date
Uwe L. Korn 17c897976c Do not assume dss never decreases.
An sbrk() caller outside jemalloc can decrease the dss, so add a
separate atomic boolean to explicitly track whether jemalloc is
concurrently calling sbrk(), rather than depending on state outside
jemalloc's full control.

Fixes #802 for stable-4
2017-09-11 11:01:21 -07:00
Jim Chen 7883c7749f Use openat syscall if available
Some architectures like AArch64 may not have the open syscall because it
was superseded by the openat syscall, so check and use SYS_openat if
SYS_open is not available.

Additionally, Android headers for AArch64 define SYS_open to __NR_open,
even though __NR_open is undefined. Undefine SYS_open in that case so
SYS_openat is used.
2017-05-12 10:34:32 -07:00
Jason Evans 807a9a3e17 Fix decommit-related run fragmentation.
When allocating runs with alignment stricter than one page, commit after
trimming the head/tail from the initial over-sized allocation, rather
than before trimming.  This avoids creating clean-but-committed runs;
such runs do not get purged (and decommitted as a side effect), so they
can cause unnecessary long-term run fragmentation.

Do not commit decommitted memory in chunk_recycle() unless asked to by
the caller.  This allows recycled arena chunks to start in the
decommitted state, and therefore increases the likelihood that purging
after run deallocation will allow the arena chunk to become a single
unused run, thus allowing the chunk as a whole to be discarded.

This resolves #766.
2017-04-18 12:08:28 -07:00
5 changed files with 117 additions and 75 deletions

View File

@ -14,6 +14,11 @@
# if !defined(SYS_write) && defined(__NR_write)
# define SYS_write __NR_write
# endif
# if defined(SYS_open) && defined(__aarch64__)
/* Android headers may define SYS_open to __NR_open even though
* __NR_open may not exist on AArch64 (superseded by __NR_openat). */
# undef SYS_open
# endif
# include <sys/uio.h>
# endif
# include <pthread.h>

View File

@ -403,7 +403,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
/* Keep track of trailing unused pages for later use. */
if (rem_pages > 0) {
size_t flags = flag_dirty | flag_decommitted;
size_t flag_unzeroed_mask = (flags == 0) ? CHUNK_MAP_UNZEROED :
size_t flag_unzeroed_mask = (flags == 0) ? CHUNK_MAP_UNZEROED :
0;
arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
@ -424,12 +424,15 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
static bool
arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
bool remove, bool zero)
bool remove, bool zero, bool commit)
{
arena_chunk_t *chunk;
arena_chunk_map_misc_t *miscelm;
size_t flag_dirty, flag_decommitted, run_ind, need_pages;
size_t flag_unzeroed_mask;
bool committed;
assert(!zero || commit);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
miscelm = arena_run_to_miscelm(run);
@ -439,9 +442,15 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
need_pages = (size >> LG_PAGE);
assert(need_pages > 0);
if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize,
run_ind << LG_PAGE, size, arena->ind))
return (true);
if (commit && flag_decommitted != 0) {
if (arena->chunk_hooks.commit(chunk, chunksize, run_ind <<
LG_PAGE, size, arena->ind)) {
return true;
}
committed = true;
} else {
committed = false;
}
if (remove) {
arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
@ -449,7 +458,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
}
if (zero) {
if (flag_decommitted != 0) {
if (committed) {
/* The run is untouched, and therefore zeroed. */
JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
*)((uintptr_t)chunk + (run_ind << LG_PAGE)),
@ -485,28 +494,34 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
* Set the last element first, in case the run only contains one page
* (i.e. both statements set the same element).
*/
flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
flag_unzeroed_mask = (flag_dirty == 0 && !committed) ?
CHUNK_MAP_UNZEROED : 0;
flag_decommitted = (!commit && flag_decommitted != 0) ?
CHUNK_MAP_DECOMMITTED : 0;
arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty |
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
run_ind+need_pages-1)));
run_ind+need_pages-1)) | flag_decommitted);
arena_mapbits_large_set(chunk, run_ind, size, flag_dirty |
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, run_ind)));
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, run_ind)) |
flag_decommitted);
return (false);
}
static bool
arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero,
bool commit)
{
return (arena_run_split_large_helper(arena, run, size, true, zero));
return (arena_run_split_large_helper(arena, run, size, true, zero,
commit));
}
static bool
arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
{
return (arena_run_split_large_helper(arena, run, size, false, zero));
return (arena_run_split_large_helper(arena, run, size, false, zero,
true));
}
static bool
@ -585,6 +600,18 @@ arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, size_t sn, bool zero,
return (chunk_register(chunk, &chunk->node, gdump));
}
static arena_chunk_t *
arena_chunk_header_commit(tsdn_t *tsdn, arena_t *arena,
chunk_hooks_t *chunk_hooks, arena_chunk_t *chunk, size_t sn, bool zero) {
if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
LG_PAGE, arena->ind)) {
chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
chunksize, sn, zero, false);
return NULL;
}
return chunk;
}
static arena_chunk_t *
arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
@ -599,13 +626,8 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
NULL, chunksize, chunksize, &sn, zero, commit);
if (chunk != NULL && !*commit) {
/* Commit header. */
if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
LG_PAGE, arena->ind)) {
chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
(void *)chunk, chunksize, sn, *zero, *commit);
chunk = NULL;
}
chunk = arena_chunk_header_commit(tsdn, arena, chunk_hooks,
chunk, sn, *zero);
}
if (chunk != NULL) {
bool gdump;
@ -641,6 +663,10 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
chunksize, &sn, zero, commit, true);
if (chunk != NULL && !*commit) {
chunk = arena_chunk_header_commit(tsdn, arena, &chunk_hooks,
chunk, sn, *zero);
}
if (chunk != NULL) {
bool gdump;
if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) {
@ -721,7 +747,7 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
}
}
arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun,
flag_unzeroed);
flag_unzeroed | flag_decommitted);
return (chunk);
}
@ -1145,18 +1171,20 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
}
static arena_run_t *
arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero,
bool commit)
{
arena_run_t *run = arena_run_first_best_fit(arena, size);
if (run != NULL) {
if (arena_run_split_large(arena, run, size, zero))
if (arena_run_split_large(arena, run, size, zero, commit))
run = NULL;
}
return (run);
}
static arena_run_t *
arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero,
bool commit)
{
arena_chunk_t *chunk;
arena_run_t *run;
@ -1165,7 +1193,7 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
assert(size == PAGE_CEILING(size));
/* Search the arena's chunks for the lowest best fit. */
run = arena_run_alloc_large_helper(arena, size, zero);
run = arena_run_alloc_large_helper(arena, size, zero, commit);
if (run != NULL)
return (run);
@ -1175,7 +1203,7 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
chunk = arena_chunk_alloc(tsdn, arena);
if (chunk != NULL) {
run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
if (arena_run_split_large(arena, run, size, zero))
if (arena_run_split_large(arena, run, size, zero, commit))
run = NULL;
return (run);
}
@ -1185,7 +1213,7 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
* sufficient memory available while this one dropped arena->lock in
* arena_chunk_alloc(), so search one more time.
*/
return (arena_run_alloc_large_helper(arena, size, zero));
return (arena_run_alloc_large_helper(arena, size, zero, commit));
}
static arena_run_t *
@ -1657,7 +1685,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
arena_chunk_alloc(tsdn, arena);
/* Temporarily allocate the free dirty run. */
arena_run_split_large(arena, run, run_size, false);
arena_run_split_large(arena, run, run_size, false,
false);
/* Stash. */
if (false)
qr_new(rdelm, rd_link); /* Redundant. */
@ -2240,9 +2269,10 @@ arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
pageind+head_npages-1)));
pageind+head_npages-1)) | flag_decommitted);
arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty |
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)));
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)) |
flag_decommitted);
if (config_debug) {
UNUSED size_t tail_npages = newsize >> LG_PAGE;
@ -2253,7 +2283,7 @@ arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
}
arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
pageind+head_npages)));
pageind+head_npages)) | flag_decommitted);
arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted !=
0));
@ -2283,9 +2313,10 @@ arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
pageind+head_npages-1)));
pageind+head_npages-1)) | flag_decommitted);
arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty |
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)));
(flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)) |
flag_decommitted);
if (config_debug) {
UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
@ -2296,7 +2327,7 @@ arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
}
arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
pageind+head_npages)));
pageind+head_npages)) | flag_decommitted);
tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
tail_run = &tail_miscelm->run;
@ -2667,7 +2698,7 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
random_offset = ((uintptr_t)r) << LG_CACHELINE;
} else
random_offset = 0;
run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero, true);
if (run == NULL) {
malloc_mutex_unlock(tsdn, &arena->lock);
return (NULL);
@ -2748,7 +2779,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
alloc_size = usize + large_pad + alignment - PAGE;
malloc_mutex_lock(tsdn, &arena->lock);
run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
run = arena_run_alloc_large(tsdn, arena, alloc_size, false, false);
if (run == NULL) {
malloc_mutex_unlock(tsdn, &arena->lock);
return (NULL);
@ -3151,7 +3182,7 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
goto label_fail;
run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
if (arena_run_split_large(arena, run, splitsize, zero))
if (arena_run_split_large(arena, run, splitsize, zero, true))
goto label_fail;
if (config_cache_oblivious && zero) {
@ -3811,6 +3842,9 @@ init_thp_initially_huge(void) {
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
fd = (int)syscall(SYS_open,
"/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
fd = (int)syscall(SYS_openat,
AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#else
fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#endif

View File

@ -250,9 +250,9 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize);
*sn = extent_node_sn_get(node);
zeroed = extent_node_zeroed_get(node);
if (zeroed)
*zero = true;
committed = extent_node_committed_get(node);
if (zeroed && committed)
*zero = true;
if (committed)
*commit = true;
/* Split the lead. */
@ -304,7 +304,8 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
arena_chunk_cache_maybe_insert(arena, node, cache);
node = NULL;
}
if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
if (*commit && !committed && chunk_hooks->commit(ret, size, 0, size,
arena->ind)) {
malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad, chunks_ad,
cache, ret, size, *sn, zeroed, committed);

View File

@ -20,6 +20,8 @@ static unsigned dss_prec_default = (unsigned)DSS_PREC_DEFAULT;
/* Base address of the DSS. */
static void *dss_base;
/* Atomic boolean indicating whether a thread is currently extending DSS. */
static unsigned dss_extending;
/* Atomic boolean indicating whether the DSS is exhausted. */
static unsigned dss_exhausted;
/* Atomic current upper limit on DSS addresses. */
@ -63,29 +65,13 @@ chunk_dss_prec_set(dss_prec_t dss_prec)
static void *
chunk_dss_max_update(void *new_addr)
{
void *max_cur;
spin_t spinner;
void *max_cur = chunk_dss_sbrk(0);
/*
* Get the current end of the DSS as max_cur and assure that dss_max is
* up to date.
*/
spin_init(&spinner);
while (true) {
void *max_prev = atomic_read_p(&dss_max);
max_cur = chunk_dss_sbrk(0);
if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
/*
* Another thread optimistically updated dss_max. Wait
* for it to finish.
*/
spin_adaptive(&spinner);
continue;
}
if (!atomic_cas_p(&dss_max, max_prev, max_cur))
break;
if (max_cur == (void *)-1) {
return NULL;
}
atomic_write_p(&dss_max, max_cur);
/* Fixed new_addr can only be supported if it is at the edge of DSS. */
if (new_addr != NULL && max_cur != new_addr)
return (NULL);
@ -93,6 +79,26 @@ chunk_dss_max_update(void *new_addr)
return (max_cur);
}
static void
chunk_dss_extending_start(void) {
spin_t spinner;
spin_init(&spinner);
while (true) {
unsigned expected = 0;
if (!atomic_cas_u(&dss_extending, expected, 1)) {
break;
}
spin_adaptive(&spinner);
}
}
static void
chunk_dss_extending_finish(void) {
assert(atomic_read_u(&dss_extending));
atomic_write_u(&dss_extending, 0);
}
void *
chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
size_t alignment, bool *zero, bool *commit)
@ -108,6 +114,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
if ((intptr_t)size < 0)
return (NULL);
chunk_dss_extending_start();
if (!atomic_read_u(&dss_exhausted)) {
/*
* The loop is necessary to recover from races with other
@ -152,19 +159,14 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
size);
/*
* Optimistically update dss_max, and roll back below if
* sbrk() fails. No other thread will try to extend the
* DSS while dss_max is greater than the current DSS
* max reported by sbrk(0).
*/
if (atomic_cas_p(&dss_max, max_cur, dss_next))
continue;
/* Try to allocate. */
dss_prev = chunk_dss_sbrk(incr);
if (dss_prev == max_cur) {
/* Success. */
atomic_write_p(&dss_max, dss_next);
chunk_dss_extending_finish();
if (gap_size_chunk != 0) {
chunk_hooks_t chunk_hooks =
CHUNK_HOOKS_INITIALIZER;
@ -186,13 +188,8 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
/*
* Failure, whether due to OOM or a race with a raw
* sbrk() call from outside the allocator. Try to roll
* back optimistic dss_max update; if rollback fails,
* it's due to another caller of this function having
* succeeded since this invocation started, in which
* case rollback is not necessary.
* sbrk() call from outside the allocator.
*/
atomic_cas_p(&dss_max, dss_next, max_cur);
if (dss_prev == (void *)-1) {
/* OOM. */
atomic_write_u(&dss_exhausted, (unsigned)true);
@ -201,6 +198,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
}
}
label_oom:
chunk_dss_extending_finish();
return (NULL);
}
@ -240,6 +238,7 @@ chunk_dss_boot(void)
cassert(have_dss);
dss_base = chunk_dss_sbrk(0);
atomic_write_u(&dss_extending, 0);
dss_exhausted = (unsigned)(dss_base == (void *)-1);
dss_max = dss_base;
}

View File

@ -250,6 +250,9 @@ os_overcommits_proc(void)
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
fd = (int)syscall(SYS_openat,
AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
#else
fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
#endif