Limit maximum number of purged slabs with option

Option `experimental_hpa_max_purge_nhp` introduced for backward
compatibility reasons: to make it possible to have behaviour similar
to buggy `hpa_strict_min_purge_interval` implementation.

When `experimental_hpa_max_purge_nhp` is set to -1, there is no limit
to number of slabs we'll purge on each iteration. Otherwise, we'll purge
no more than `experimental_hpa_max_purge_nhp` hugepages (slabs). This in
turn means we might not purge enough dirty pages to satisfy
`hpa_dirty_mult` requirement.

Combination of `hpa_dirty_mult`, `experimental_hpa_max_purge_nhp` and
`hpa_strict_min_purge_interval` options allows us to have steady rate of
pages returned back to the system. This provides a strickier latency
guarantees as number of `madvise` calls is bounded (and hence number of
TLB shootdowns is limited) in exchange to weaker memory usage
guarantees.
This commit is contained in:
Dmitry Ilvokhin 2024-08-06 08:47:57 -07:00 committed by Qi Wang
parent 143f458188
commit aaa29003ab
7 changed files with 109 additions and 6 deletions

View File

@ -57,6 +57,11 @@ struct hpa_shard_opts_s {
* purging logic fix.
*/
bool strict_min_purge_interval;
/*
* Maximum number of hugepages to purge on each purging attempt.
*/
ssize_t experimental_max_purge_nhp;
};
#define HPA_SHARD_OPTS_DEFAULT { \
@ -79,7 +84,9 @@ struct hpa_shard_opts_s {
/* min_purge_interval_ms */ \
5 * 1000, \
/* strict_min_purge_interval */ \
false \
false, \
/* experimental_max_purge_nhp */ \
-1 \
}
#endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */

View File

@ -104,6 +104,7 @@ CTL_PROTO(opt_hpa_hugification_threshold)
CTL_PROTO(opt_hpa_hugify_delay_ms)
CTL_PROTO(opt_hpa_min_purge_interval_ms)
CTL_PROTO(opt_hpa_strict_min_purge_interval)
CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
CTL_PROTO(opt_hpa_dirty_mult)
CTL_PROTO(opt_hpa_sec_nshards)
CTL_PROTO(opt_hpa_sec_max_alloc)
@ -460,7 +461,10 @@ static const ctl_named_node_t opt_node[] = {
CTL(opt_hpa_hugification_threshold)},
{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
{NAME("hpa_strict_min_purge_interval"), CTL(opt_hpa_strict_min_purge_interval)},
{NAME("hpa_strict_min_purge_interval"),
CTL(opt_hpa_strict_min_purge_interval)},
{NAME("experimental_hpa_max_purge_nhp"),
CTL(opt_experimental_hpa_max_purge_nhp)},
{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
@ -2197,6 +2201,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
uint64_t)
CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
opt_hpa_opts.strict_min_purge_interval, bool)
CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
/*
* This will have to change before we publicly document this option; fxp_t and

View File

@ -552,7 +552,22 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
* too frequently.
*/
if (hpa_min_purge_interval_passed(tsdn, shard)) {
while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
size_t max_purges = max_ops;
/*
* Limit number of hugepages (slabs) to purge.
* When experimental_max_purge_nhp option is used, there is no
* guarantee we'll always respect dirty_mult option. Option
* experimental_max_purge_nhp provides a way to configure same
* behaviour as was possible before, with buggy implementation
* of purging algorithm.
*/
ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;
if (max_purge_nhp != -1 &&
max_purges > (size_t)max_purge_nhp) {
max_purges = max_purge_nhp;
}
while (hpa_should_purge(tsdn, shard) && nops < max_purges) {
if (!hpa_try_purge(tsdn, shard)) {
/*
* It is fine if we couldn't purge as sometimes

View File

@ -1558,6 +1558,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
opt_hpa_opts.strict_min_purge_interval,
"hpa_strict_min_purge_interval");
CONF_HANDLE_SSIZE_T(
opt_hpa_opts.experimental_max_purge_nhp,
"experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
if (CONF_MATCH("hpa_dirty_mult")) {
if (CONF_MATCH_VALUE("-1")) {
opt_hpa_opts.dirty_mult = (fxp_t)-1;

View File

@ -1565,6 +1565,7 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_UINT64("hpa_hugify_delay_ms")
OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
== 0) {
/*

View File

@ -35,7 +35,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
/* min_purge_interval_ms */
5 * 1000,
/* strict_min_purge_interval */
false
false,
/* experimental_max_purge_nhp */
-1
};
static hpa_shard_opts_t test_hpa_shard_opts_purge = {
@ -52,7 +54,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
/* min_purge_interval_ms */
5 * 1000,
/* strict_min_purge_interval */
false
false,
/* experimental_max_purge_nhp */
-1
};
static hpa_shard_t *
@ -653,6 +657,70 @@ TEST_BEGIN(test_purge) {
}
TEST_END
TEST_BEGIN(test_experimental_max_purge_nhp) {
test_skip_if(!hpa_supported());
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
opts.experimental_max_purge_nhp = 1;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum {NALLOCS = 8 * HUGEPAGE_PAGES};
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate 3 hugepages out of 8. */
for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
pai_dalloc(tsdn, &shard->pai, edatas[i],
&deferred_work_generated);
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
/*
* Expect only one purge call, because opts.experimental_max_purge_nhp
* is set to 1.
*/
expect_zu_eq(1, ndefer_purge_calls, "Expect purges");
ndefer_purge_calls = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
/* We still above the limit for dirty pages. */
expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
ndefer_purge_calls = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
/* Finally, we are below the limit, no purges are expected. */
expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
destroy_test_data(shard);
}
TEST_END
int
main(void) {
/*
@ -675,5 +743,6 @@ main(void) {
test_purge_no_infinite_loop,
test_strict_no_min_purge_interval,
test_strict_min_purge_interval,
test_purge);
test_purge,
test_experimental_max_purge_nhp);
}

View File

@ -292,6 +292,7 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
TEST_MALLCTL_OPT(unsigned, narenas, always);
TEST_MALLCTL_OPT(const char *, percpu_arena, always);
TEST_MALLCTL_OPT(size_t, oversize_threshold, always);