Reland "[scudo] Manage free blocks in BatchGroup."

This is not a pure revert of c929bcb7d8.
It also includes a bug fix.

Differential Revision: https://reviews.llvm.org/D136029
This commit is contained in:
Chia-hung Duan 2022-10-17 20:38:37 +00:00
parent 8c8775e938
commit c0f91856a3
8 changed files with 640 additions and 67 deletions

View File

@ -34,6 +34,14 @@ namespace scudo {
// typedef SizeClassAllocator64<ExampleConfig> Primary;
// // Log2 of the size of a size class region, as used by the Primary.
// static const uptr PrimaryRegionSizeLog = 30U;
// // Log2 of the size of block group, as used by the Primary. Each group
// // contains a range of memory addresses, blocks in the range will belong to
// // the same group. In general, single region may have 1 or 2MB group size.
// // Multiple regions will have the group size equal to the region size
// // because the region size is usually smaller than 1 MB.
// // Smaller value gives fine-grained control of memory usage but the trade
// // off is that it may take longer time of deallocation.
// static const uptr PrimaryGroupSizeLog = 20U;
// // Defines the type and scale of a compact pointer. A compact pointer can
// // be understood as the offset of a pointer within the region it belongs
// // to, in increments of a power-of-2 scale.
@ -65,6 +73,7 @@ struct DefaultConfig {
#if SCUDO_CAN_USE_PRIMARY64
typedef SizeClassAllocator64<DefaultConfig> Primary;
static const uptr PrimaryRegionSizeLog = 32U;
static const uptr PrimaryGroupSizeLog = 21U;
typedef uptr PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = 0;
static const bool PrimaryEnableRandomOffset = true;
@ -72,6 +81,7 @@ struct DefaultConfig {
#else
typedef SizeClassAllocator32<DefaultConfig> Primary;
static const uptr PrimaryRegionSizeLog = 19U;
static const uptr PrimaryGroupSizeLog = 19U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
@ -96,11 +106,13 @@ struct AndroidConfig {
static const uptr PrimaryRegionSizeLog = 28U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
static const uptr PrimaryGroupSizeLog = 20U;
static const bool PrimaryEnableRandomOffset = true;
static const uptr PrimaryMapSizeIncrement = 1UL << 18;
#else
typedef SizeClassAllocator32<AndroidConfig> Primary;
static const uptr PrimaryRegionSizeLog = 18U;
static const uptr PrimaryGroupSizeLog = 18U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
@ -127,11 +139,13 @@ struct AndroidSvelteConfig {
static const uptr PrimaryRegionSizeLog = 27U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
static const uptr PrimaryGroupSizeLog = 18U;
static const bool PrimaryEnableRandomOffset = true;
static const uptr PrimaryMapSizeIncrement = 1UL << 18;
#else
typedef SizeClassAllocator32<AndroidSvelteConfig> Primary;
static const uptr PrimaryRegionSizeLog = 16U;
static const uptr PrimaryGroupSizeLog = 16U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
@ -156,6 +170,7 @@ struct FuchsiaConfig {
typedef SizeClassAllocator64<FuchsiaConfig> Primary;
static const uptr PrimaryRegionSizeLog = 30U;
static const uptr PrimaryGroupSizeLog = 30U;
typedef u32 PrimaryCompactPtrT;
static const bool PrimaryEnableRandomOffset = true;
static const uptr PrimaryMapSizeIncrement = 1UL << 18;
@ -175,6 +190,7 @@ struct TrustyConfig {
typedef SizeClassAllocator64<TrustyConfig> Primary;
// Some apps have 1 page of heap total so small regions are necessary.
static const uptr PrimaryRegionSizeLog = 10U;
static const uptr PrimaryGroupSizeLog = 10U;
typedef u32 PrimaryCompactPtrT;
static const bool PrimaryEnableRandomOffset = false;
// Trusty is extremely memory-constrained so minimally round up map calls.

View File

@ -110,6 +110,18 @@ template <class T> struct SinglyLinkedList : public IntrusiveList<T> {
Size--;
}
// Insert X next to Prev
void insert(T *Prev, T *X) {
DCHECK(!empty());
DCHECK_NE(Prev, nullptr);
DCHECK_NE(X, nullptr);
X->Next = Prev->Next;
Prev->Next = X;
if (Last == Prev)
Last = X;
++Size;
}
void extract(T *Prev, T *X) {
DCHECK(!empty());
DCHECK_NE(Prev, nullptr);

View File

@ -10,6 +10,7 @@
#define SCUDO_LOCAL_CACHE_H_
#include "internal_defs.h"
#include "list.h"
#include "platform.h"
#include "report.h"
#include "stats.h"
@ -27,6 +28,12 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
Count = N;
memcpy(Batch, Array, sizeof(Batch[0]) * Count);
}
void appendFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
}
void clear() { Count = 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
@ -50,6 +57,22 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
u16 Count;
};
// A BatchGroup is used to collect blocks. Each group has a group id to
// identify the group kind of contained blocks.
struct BatchGroup {
// `Next` is used by IntrusiveList.
BatchGroup *Next;
// The identifier of each group
uptr GroupId;
// Cache value of TransferBatch::getMaxCached()
u16 MaxCachedPerBatch;
// Blocks are managed by TransferBatch in a list.
SinglyLinkedList<TransferBatch> Batches;
};
static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
"BatchGroup uses the same class size as TransferBatch");
void init(GlobalStats *S, SizeClassAllocator *A) {
DCHECK(isEmpty());
Stats.init();
@ -121,9 +144,18 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
TransferBatch *createBatch(uptr ClassId, void *B) {
if (ClassId != BatchClassId)
B = allocate(BatchClassId);
if (UNLIKELY(!B))
reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
return reinterpret_cast<TransferBatch *>(B);
}
BatchGroup *createGroup() {
void *Ptr = allocate(BatchClassId);
if (UNLIKELY(!Ptr))
reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
return reinterpret_cast<BatchGroup *>(Ptr);
}
LocalStats &getStats() { return Stats; }
private:
@ -182,16 +214,11 @@ private:
NOINLINE void drain(PerClass *C, uptr ClassId) {
const u16 Count = Min(static_cast<u16>(C->MaxCount / 2), C->Count);
TransferBatch *B =
createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0]));
if (UNLIKELY(!B))
reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
B->setFromArray(&C->Chunks[0], Count);
Allocator->pushBlocks(this, ClassId, &C->Chunks[0], Count);
// u16 will be promoted to int by arithmetic type conversion.
C->Count = static_cast<u16>(C->Count - Count);
for (u16 I = 0; I < C->Count; I++)
C->Chunks[I] = C->Chunks[I + Count];
Allocator->pushBatch(ClassId, B);
}
};

View File

@ -43,6 +43,7 @@ template <typename Config> class SizeClassAllocator32 {
public:
typedef typename Config::PrimaryCompactPtrT CompactPtrT;
typedef typename Config::SizeClassMap SizeClassMap;
static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog;
// The bytemap can only track UINT8_MAX - 1 classes.
static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), "");
// Regions should be large enough to hold the largest Block.
@ -51,6 +52,7 @@ public:
typedef SizeClassAllocator32<Config> ThisT;
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
typedef typename CacheT::TransferBatch TransferBatch;
typedef typename CacheT::BatchGroup BatchGroup;
static uptr getSizeByClassId(uptr ClassId) {
return (ClassId == SizeClassMap::BatchClassId)
@ -111,30 +113,69 @@ public:
return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr));
}
uptr compactPtrGroup(CompactPtrT CompactPtr) {
return CompactPtr >> GroupSizeLog;
}
TransferBatch *popBatch(CacheT *C, uptr ClassId) {
DCHECK_LT(ClassId, NumClasses);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
ScopedLock L(Sci->Mutex);
TransferBatch *B = Sci->FreeList.front();
if (B) {
Sci->FreeList.pop_front();
} else {
B = populateFreeList(C, ClassId, Sci);
if (UNLIKELY(!B))
TransferBatch *B = popBatchImpl(C, ClassId);
if (UNLIKELY(!B)) {
if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
return nullptr;
B = popBatchImpl(C, ClassId);
// if `populateFreeList` succeeded, we are supposed to get free blocks.
DCHECK_NE(B, nullptr);
}
DCHECK_GT(B->getCount(), 0);
Sci->Stats.PoppedBlocks += B->getCount();
return B;
}
void pushBatch(uptr ClassId, TransferBatch *B) {
// Push the array of free blocks to the designated batch group.
void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
DCHECK_LT(ClassId, NumClasses);
DCHECK_GT(B->getCount(), 0);
DCHECK_GT(Size, 0);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
if (ClassId == SizeClassMap::BatchClassId) {
ScopedLock L(Sci->Mutex);
// Constructing a batch group in the free list will use two blocks in
// BatchClassId. If we are pushing BatchClassId blocks, we will use the
// blocks in the array directly (can't delegate local cache which will
// cause a recursive allocation). However, The number of free blocks may
// be less than two. Therefore, populate the free list before inserting
// the blocks.
if (Size == 1 && !populateFreeList(C, ClassId, Sci))
return;
pushBlocksImpl(C, ClassId, Array, Size);
Sci->Stats.PushedBlocks += Size;
return;
}
// TODO(chiahungduan): Consider not doing grouping if the group size is not
// greater than the block size with a certain scale.
// Sort the blocks so that blocks belonging to the same group can be pushed
// together.
bool SameGroup = true;
for (u32 I = 1; I < Size; ++I) {
if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I]))
SameGroup = false;
CompactPtrT Cur = Array[I];
u32 J = I;
while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) {
Array[J] = Array[J - 1];
--J;
}
Array[J] = Cur;
}
ScopedLock L(Sci->Mutex);
Sci->FreeList.push_front(B);
Sci->Stats.PushedBlocks += B->getCount();
pushBlocksImpl(C, ClassId, Array, Size, SameGroup);
Sci->Stats.PushedBlocks += Size;
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Sci, ClassId);
}
@ -256,7 +297,7 @@ private:
struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo {
HybridMutex Mutex;
SinglyLinkedList<TransferBatch> FreeList;
SinglyLinkedList<BatchGroup> FreeList;
uptr CurrentRegion;
uptr CurrentRegionAllocated;
SizeClassStats Stats;
@ -328,8 +369,187 @@ private:
return &SizeClassInfoArray[ClassId];
}
NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
SizeClassInfo *Sci) {
// Push the blocks to their batch group. The layout will be like,
//
// FreeList - > BG -> BG -> BG
// | | |
// v v v
// TB TB TB
// |
// v
// TB
//
// Each BlockGroup(BG) will associate with unique group id and the free blocks
// are managed by a list of TransferBatch(TB). To reduce the time of inserting
// blocks, BGs are sorted and the input `Array` are supposed to be sorted so
// that we can get better performance of maintaining sorted property.
// Use `SameGroup=true` to indicate that all blocks in the array are from the
// same group then we will skip checking the group id of each block.
//
// Note that this aims to have a better management of dirty pages, i.e., the
// RSS usage won't grow indefinitely. There's an exception that we may not put
// a block to its associated group. While populating new blocks, we may have
// blocks cross different groups. However, most cases will fall into same
// group and they are supposed to be popped soon. In that case, it's not worth
// sorting the array with the almost-sorted property. Therefore, we use
// `SameGroup=true` instead.
//
// The region mutex needs to be held while calling this method.
void pushBlocksImpl(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size,
bool SameGroup = false) {
DCHECK_GT(Size, 0U);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
auto CreateGroup = [&](uptr GroupId) {
BatchGroup *BG = nullptr;
TransferBatch *TB = nullptr;
if (ClassId == SizeClassMap::BatchClassId) {
DCHECK_GE(Size, 2U);
BG = reinterpret_cast<BatchGroup *>(
decompactPtr(ClassId, Array[Size - 1]));
BG->Batches.clear();
TB = reinterpret_cast<TransferBatch *>(
decompactPtr(ClassId, Array[Size - 2]));
TB->clear();
} else {
BG = C->createGroup();
BG->Batches.clear();
TB = C->createBatch(ClassId, nullptr);
TB->clear();
}
BG->GroupId = GroupId;
BG->Batches.push_front(TB);
BG->MaxCachedPerBatch =
TransferBatch::getMaxCached(getSizeByClassId(ClassId));
return BG;
};
auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) {
SinglyLinkedList<TransferBatch> &Batches = BG->Batches;
TransferBatch *CurBatch = Batches.front();
DCHECK_NE(CurBatch, nullptr);
for (u32 I = 0; I < Size;) {
DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
u16 UnusedSlots =
static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
if (UnusedSlots == 0) {
CurBatch = C->createBatch(
ClassId,
reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
CurBatch->clear();
Batches.push_front(CurBatch);
UnusedSlots = BG->MaxCachedPerBatch;
}
// `UnusedSlots` is u16 so the result will be also fit in u16.
u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
CurBatch->appendFromArray(&Array[I], AppendSize);
I += AppendSize;
}
};
BatchGroup *Cur = Sci->FreeList.front();
if (ClassId == SizeClassMap::BatchClassId) {
if (Cur == nullptr) {
// Don't need to classify BatchClassId.
Cur = CreateGroup(/*GroupId=*/0);
Sci->FreeList.push_front(Cur);
}
InsertBlocks(Cur, Array, Size);
return;
}
// In the following, `Cur` always points to the BatchGroup for blocks that
// will be pushed next. `Prev` is the element right before `Cur`.
BatchGroup *Prev = nullptr;
while (Cur != nullptr && compactPtrGroup(Array[0]) > Cur->GroupId) {
Prev = Cur;
Cur = Cur->Next;
}
if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->GroupId) {
Cur = CreateGroup(compactPtrGroup(Array[0]));
if (Prev == nullptr)
Sci->FreeList.push_front(Cur);
else
Sci->FreeList.insert(Prev, Cur);
}
// All the blocks are from the same group, just push without checking group
// id.
if (SameGroup) {
InsertBlocks(Cur, Array, Size);
return;
}
// The blocks are sorted by group id. Determine the segment of group and
// push them to their group together.
u32 Count = 1;
for (u32 I = 1; I < Size; ++I) {
if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) {
DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->GroupId);
InsertBlocks(Cur, Array + I - Count, Count);
while (Cur != nullptr && compactPtrGroup(Array[I]) > Cur->GroupId) {
Prev = Cur;
Cur = Cur->Next;
}
if (Cur == nullptr || compactPtrGroup(Array[I]) != Cur->GroupId) {
Cur = CreateGroup(compactPtrGroup(Array[I]));
DCHECK_NE(Prev, nullptr);
Sci->FreeList.insert(Prev, Cur);
}
Count = 1;
} else {
++Count;
}
}
InsertBlocks(Cur, Array + Size - Count, Count);
}
// Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
// group id will be considered first.
//
// The region mutex needs to be held while calling this method.
TransferBatch *popBatchImpl(CacheT *C, uptr ClassId) {
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
if (Sci->FreeList.empty())
return nullptr;
SinglyLinkedList<TransferBatch> &Batches = Sci->FreeList.front()->Batches;
DCHECK(!Batches.empty());
TransferBatch *B = Batches.front();
Batches.pop_front();
DCHECK_NE(B, nullptr);
DCHECK_GT(B->getCount(), 0U);
if (Batches.empty()) {
BatchGroup *BG = Sci->FreeList.front();
Sci->FreeList.pop_front();
// We don't keep BatchGroup with zero blocks to avoid empty-checking while
// allocating. Note that block used by constructing BatchGroup is recorded
// as free blocks in the last element of BatchGroup::Batches. Which means,
// once we pop the last TransferBatch, the block is implicitly
// deallocated.
if (ClassId != SizeClassMap::BatchClassId)
C->deallocate(SizeClassMap::BatchClassId, BG);
}
return B;
}
NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) {
uptr Region;
uptr Offset;
// If the size-class currently has a region associated to it, use it. The
@ -344,7 +564,7 @@ private:
DCHECK_EQ(Sci->CurrentRegionAllocated, 0U);
Region = allocateRegion(Sci, ClassId);
if (UNLIKELY(!Region))
return nullptr;
return false;
C->getStats().add(StatMapped, RegionSize);
Sci->CurrentRegion = Region;
Offset = 0;
@ -378,20 +598,15 @@ private:
if (ClassId != SizeClassMap::BatchClassId)
shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState);
for (u32 I = 0; I < NumberOfBlocks;) {
TransferBatch *B =
C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I]));
if (UNLIKELY(!B))
return nullptr;
// `MaxCount` is u16 so the result will also fit in u16.
const u16 N = static_cast<u16>(Min<u32>(MaxCount, NumberOfBlocks - I));
B->setFromArray(&ShuffleArray[I], N);
Sci->FreeList.push_back(B);
// Note that the N blocks here may have different group ids. Given that
// it only happens when it crosses the group size boundary. Instead of
// sorting them, treat them as same group here to avoid sorting the
// almost-sorted blocks.
pushBlocksImpl(C, ClassId, &ShuffleArray[I], N, /*SameGroup=*/true);
I += N;
}
TransferBatch *B = Sci->FreeList.front();
Sci->FreeList.pop_front();
DCHECK(B);
DCHECK_GT(B->getCount(), 0);
const uptr AllocatedUser = Size * NumberOfBlocks;
C->getStats().add(StatFree, AllocatedUser);
@ -407,7 +622,7 @@ private:
}
Sci->AllocatedUser += AllocatedUser;
return B;
return true;
}
void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
@ -477,8 +692,12 @@ private:
auto DecompactPtr = [](CompactPtrT CompactPtr) {
return reinterpret_cast<uptr>(CompactPtr);
};
releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize,
Recorder, DecompactPtr, SkipRegion);
PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
for (BatchGroup &BG : Sci->FreeList)
Context.markFreeBlocks(BG.Batches, DecompactPtr, Base);
releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
if (Recorder.getReleasedRangesCount() > 0) {
Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();

View File

@ -45,10 +45,12 @@ template <typename Config> class SizeClassAllocator64 {
public:
typedef typename Config::PrimaryCompactPtrT CompactPtrT;
static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale;
static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog;
typedef typename Config::SizeClassMap SizeClassMap;
typedef SizeClassAllocator64<Config> ThisT;
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
typedef typename CacheT::TransferBatch TransferBatch;
typedef typename CacheT::BatchGroup BatchGroup;
static uptr getSizeByClassId(uptr ClassId) {
return (ClassId == SizeClassMap::BatchClassId)
@ -99,25 +101,61 @@ public:
DCHECK_LT(ClassId, NumClasses);
RegionInfo *Region = getRegionInfo(ClassId);
ScopedLock L(Region->Mutex);
TransferBatch *B = Region->FreeList.front();
if (B) {
Region->FreeList.pop_front();
} else {
B = populateFreeList(C, ClassId, Region);
if (UNLIKELY(!B))
TransferBatch *B = popBatchImpl(C, ClassId);
if (UNLIKELY(!B)) {
if (UNLIKELY(!populateFreeList(C, ClassId, Region)))
return nullptr;
B = popBatchImpl(C, ClassId);
// if `populateFreeList` succeeded, we are supposed to get free blocks.
DCHECK_NE(B, nullptr);
}
DCHECK_GT(B->getCount(), 0);
Region->Stats.PoppedBlocks += B->getCount();
return B;
}
void pushBatch(uptr ClassId, TransferBatch *B) {
DCHECK_GT(B->getCount(), 0);
// Push the array of free blocks to the designated batch group.
void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
DCHECK_LT(ClassId, NumClasses);
DCHECK_GT(Size, 0);
RegionInfo *Region = getRegionInfo(ClassId);
if (ClassId == SizeClassMap::BatchClassId) {
ScopedLock L(Region->Mutex);
// Constructing a batch group in the free list will use two blocks in
// BatchClassId. If we are pushing BatchClassId blocks, we will use the
// blocks in the array directly (can't delegate local cache which will
// cause a recursive allocation). However, The number of free blocks may
// be less than two. Therefore, populate the free list before inserting
// the blocks.
if (Size == 1 && UNLIKELY(!populateFreeList(C, ClassId, Region)))
return;
pushBlocksImpl(C, ClassId, Array, Size);
Region->Stats.PushedBlocks += Size;
return;
}
// TODO(chiahungduan): Consider not doing grouping if the group size is not
// greater than the block size with a certain scale.
// Sort the blocks so that blocks belonging to the same group can be pushed
// together.
bool SameGroup = true;
for (u32 I = 1; I < Size; ++I) {
if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I]))
SameGroup = false;
CompactPtrT Cur = Array[I];
u32 J = I;
while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) {
Array[J] = Array[J - 1];
--J;
}
Array[J] = Cur;
}
ScopedLock L(Region->Mutex);
Region->FreeList.push_front(B);
Region->Stats.PushedBlocks += B->getCount();
pushBlocksImpl(C, ClassId, Array, Size, SameGroup);
Region->Stats.PushedBlocks += Size;
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Region, ClassId);
}
@ -292,7 +330,7 @@ private:
struct UnpaddedRegionInfo {
HybridMutex Mutex;
SinglyLinkedList<TransferBatch> FreeList;
SinglyLinkedList<BatchGroup> FreeList;
uptr RegionBeg = 0;
RegionStats Stats = {};
u32 RandState = 0;
@ -330,8 +368,192 @@ private:
return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale);
}
NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
RegionInfo *Region) {
static uptr compactPtrGroup(CompactPtrT CompactPtr) {
return CompactPtr >> (GroupSizeLog - CompactPtrScale);
}
// Push the blocks to their batch group. The layout will be like,
//
// FreeList - > BG -> BG -> BG
// | | |
// v v v
// TB TB TB
// |
// v
// TB
//
// Each BlockGroup(BG) will associate with unique group id and the free blocks
// are managed by a list of TransferBatch(TB). To reduce the time of inserting
// blocks, BGs are sorted and the input `Array` are supposed to be sorted so
// that we can get better performance of maintaining sorted property.
// Use `SameGroup=true` to indicate that all blocks in the array are from the
// same group then we will skip checking the group id of each block.
//
// Note that this aims to have a better management of dirty pages, i.e., the
// RSS usage won't grow indefinitely. There's an exception that we may not put
// a block to its associated group. While populating new blocks, we may have
// blocks cross different groups. However, most cases will fall into same
// group and they are supposed to be popped soon. In that case, it's not worth
// sorting the array with the almost-sorted property. Therefore, we use
// `SameGroup=true` instead.
//
// The region mutex needs to be held while calling this method.
void pushBlocksImpl(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size,
bool SameGroup = false) {
DCHECK_GT(Size, 0U);
RegionInfo *Region = getRegionInfo(ClassId);
auto CreateGroup = [&](uptr GroupId) {
BatchGroup *BG = nullptr;
TransferBatch *TB = nullptr;
if (ClassId == SizeClassMap::BatchClassId) {
DCHECK_GE(Size, 2U);
BG = reinterpret_cast<BatchGroup *>(
decompactPtr(ClassId, Array[Size - 1]));
BG->Batches.clear();
TB = reinterpret_cast<TransferBatch *>(
decompactPtr(ClassId, Array[Size - 2]));
TB->clear();
} else {
BG = C->createGroup();
BG->Batches.clear();
TB = C->createBatch(ClassId, nullptr);
TB->clear();
}
BG->GroupId = GroupId;
BG->Batches.push_front(TB);
BG->MaxCachedPerBatch =
TransferBatch::getMaxCached(getSizeByClassId(ClassId));
return BG;
};
auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) {
SinglyLinkedList<TransferBatch> &Batches = BG->Batches;
TransferBatch *CurBatch = Batches.front();
DCHECK_NE(CurBatch, nullptr);
for (u32 I = 0; I < Size;) {
DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
u16 UnusedSlots =
static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
if (UnusedSlots == 0) {
CurBatch = C->createBatch(
ClassId,
reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
CurBatch->clear();
Batches.push_front(CurBatch);
UnusedSlots = BG->MaxCachedPerBatch;
}
// `UnusedSlots` is u16 so the result will be also fit in u16.
u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
CurBatch->appendFromArray(&Array[I], AppendSize);
I += AppendSize;
}
};
BatchGroup *Cur = Region->FreeList.front();
if (ClassId == SizeClassMap::BatchClassId) {
if (Cur == nullptr) {
// Don't need to classify BatchClassId.
Cur = CreateGroup(/*GroupId=*/0);
Region->FreeList.push_front(Cur);
}
InsertBlocks(Cur, Array, Size);
return;
}
// In the following, `Cur` always points to the BatchGroup for blocks that
// will be pushed next. `Prev` is the element right before `Cur`.
BatchGroup *Prev = nullptr;
while (Cur != nullptr && compactPtrGroup(Array[0]) > Cur->GroupId) {
Prev = Cur;
Cur = Cur->Next;
}
if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->GroupId) {
Cur = CreateGroup(compactPtrGroup(Array[0]));
if (Prev == nullptr)
Region->FreeList.push_front(Cur);
else
Region->FreeList.insert(Prev, Cur);
}
// All the blocks are from the same group, just push without checking group
// id.
if (SameGroup) {
InsertBlocks(Cur, Array, Size);
return;
}
// The blocks are sorted by group id. Determine the segment of group and
// push them to their group together.
u32 Count = 1;
for (u32 I = 1; I < Size; ++I) {
if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) {
DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->GroupId);
InsertBlocks(Cur, Array + I - Count, Count);
while (Cur != nullptr && compactPtrGroup(Array[I]) > Cur->GroupId) {
Prev = Cur;
Cur = Cur->Next;
}
if (Cur == nullptr || compactPtrGroup(Array[I]) != Cur->GroupId) {
Cur = CreateGroup(compactPtrGroup(Array[I]));
DCHECK_NE(Prev, nullptr);
Region->FreeList.insert(Prev, Cur);
}
Count = 1;
} else {
++Count;
}
}
InsertBlocks(Cur, Array + Size - Count, Count);
}
// Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
// group id will be considered first.
//
// The region mutex needs to be held while calling this method.
TransferBatch *popBatchImpl(CacheT *C, uptr ClassId) {
RegionInfo *Region = getRegionInfo(ClassId);
if (Region->FreeList.empty())
return nullptr;
SinglyLinkedList<TransferBatch> &Batches =
Region->FreeList.front()->Batches;
DCHECK(!Batches.empty());
TransferBatch *B = Batches.front();
Batches.pop_front();
DCHECK_NE(B, nullptr);
DCHECK_GT(B->getCount(), 0U);
if (Batches.empty()) {
BatchGroup *BG = Region->FreeList.front();
Region->FreeList.pop_front();
// We don't keep BatchGroup with zero blocks to avoid empty-checking while
// allocating. Note that block used by constructing BatchGroup is recorded
// as free blocks in the last element of BatchGroup::Batches. Which means,
// once we pop the last TransferBatch, the block is implicitly
// deallocated.
if (ClassId != SizeClassMap::BatchClassId)
C->deallocate(SizeClassMap::BatchClassId, BG);
}
return B;
}
NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) {
const uptr Size = getSizeByClassId(ClassId);
const u16 MaxCount = TransferBatch::getMaxCached(Size);
@ -354,7 +576,7 @@ private:
RegionSize >> 20, Size);
Str.output();
}
return nullptr;
return false;
}
if (MappedUser == 0)
Region->Data = Data;
@ -363,8 +585,9 @@ private:
"scudo:primary",
MAP_ALLOWNOMEM | MAP_RESIZABLE |
(useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0),
&Region->Data)))
return nullptr;
&Region->Data))) {
return false;
}
Region->MappedUser += MapSize;
C->getStats().add(StatMapped, MapSize);
}
@ -387,27 +610,21 @@ private:
if (ClassId != SizeClassMap::BatchClassId)
shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState);
for (u32 I = 0; I < NumberOfBlocks;) {
TransferBatch *B =
C->createBatch(ClassId, reinterpret_cast<void *>(decompactPtrInternal(
CompactPtrBase, ShuffleArray[I])));
if (UNLIKELY(!B))
return nullptr;
// `MaxCount` is u16 so the result will also fit in u16.
const u16 N = static_cast<u16>(Min<u32>(MaxCount, NumberOfBlocks - I));
B->setFromArray(&ShuffleArray[I], N);
Region->FreeList.push_back(B);
// Note that the N blocks here may have different group ids. Given that
// it only happens when it crosses the group size boundary. Instead of
// sorting them, treat them as same group here to avoid sorting the
// almost-sorted blocks.
pushBlocksImpl(C, ClassId, &ShuffleArray[I], N, /*SameGroup=*/true);
I += N;
}
TransferBatch *B = Region->FreeList.front();
Region->FreeList.pop_front();
DCHECK(B);
DCHECK_GT(B->getCount(), 0);
const uptr AllocatedUser = Size * NumberOfBlocks;
C->getStats().add(StatFree, AllocatedUser);
Region->AllocatedUser += AllocatedUser;
return B;
return true;
}
void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
@ -473,8 +690,11 @@ private:
return decompactPtrInternal(CompactPtrBase, CompactPtr);
};
auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U,
BlockSize, Recorder, DecompactPtr, SkipRegion);
PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
for (BatchGroup &BG : Region->FreeList)
Context.markFreeBlocks(BG.Batches, DecompactPtr, Region->RegionBeg);
releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
if (Recorder.getReleasedRangesCount() > 0) {
Region->ReleaseInfo.PushedBlocksAtLastRelease =

View File

@ -525,6 +525,7 @@ struct DeathConfig {
static const scudo::uptr PrimaryCompactPtrScale = 0;
static const bool PrimaryEnableRandomOffset = true;
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
static const scudo::uptr PrimaryGroupSizeLog = 18;
typedef scudo::MapAllocatorNoCache SecondaryCache;
template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>;

View File

@ -161,6 +161,10 @@ TEST(ScudoListTest, SinglyLinkedList) {
setList(&L1, X);
checkList(&L1, X);
setList(&L1, X, Y);
L1.insert(X, Z);
checkList(&L1, X, Z, Y);
setList(&L1, X, Y, Z);
setList(&L2, A, B, C);
L1.append_back(&L2);

View File

@ -12,8 +12,11 @@
#include "primary64.h"
#include "size_class_map.h"
#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <random>
#include <stdlib.h>
#include <thread>
#include <vector>
@ -24,6 +27,7 @@
struct TestConfig1 {
static const scudo::uptr PrimaryRegionSizeLog = 18U;
static const scudo::uptr PrimaryGroupSizeLog = 18U;
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = false;
@ -40,6 +44,7 @@ struct TestConfig2 {
#else
static const scudo::uptr PrimaryRegionSizeLog = 24U;
#endif
static const scudo::uptr PrimaryGroupSizeLog = 20U;
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = false;
@ -56,6 +61,7 @@ struct TestConfig3 {
#else
static const scudo::uptr PrimaryRegionSizeLog = 24U;
#endif
static const scudo::uptr PrimaryGroupSizeLog = 20U;
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = true;
@ -65,6 +71,23 @@ struct TestConfig3 {
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
};
struct TestConfig4 {
#if defined(__mips__)
// Unable to allocate greater size on QEMU-user.
static const scudo::uptr PrimaryRegionSizeLog = 23U;
#else
static const scudo::uptr PrimaryRegionSizeLog = 24U;
#endif
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = true;
static const scudo::uptr PrimaryCompactPtrScale = 3U;
static const scudo::uptr PrimaryGroupSizeLog = 20U;
typedef scudo::u32 PrimaryCompactPtrT;
static const bool PrimaryEnableRandomOffset = true;
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
};
template <typename BaseConfig, typename SizeClassMapT>
struct Config : public BaseConfig {
using SizeClassMap = SizeClassMapT;
@ -100,7 +123,8 @@ template <class BaseConfig> struct ScudoPrimaryTest : public Test {};
#define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig1) \
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2) \
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3)
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) \
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4)
#endif
#define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \
@ -153,6 +177,7 @@ struct SmallRegionsConfig {
static const scudo::uptr PrimaryCompactPtrScale = 0;
static const bool PrimaryEnableRandomOffset = true;
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
static const scudo::uptr PrimaryGroupSizeLog = 20U;
};
// The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes.
@ -170,6 +195,8 @@ TEST(ScudoPrimaryTest, Primary64OOM) {
std::vector<TransferBatch *> Batches;
const scudo::uptr ClassId = Primary::SizeClassMap::LargestClassId;
const scudo::uptr Size = Primary::getSizeByClassId(ClassId);
typename Primary::CacheT::CompactPtrT Blocks[TransferBatch::MaxNumCached];
for (scudo::uptr I = 0; I < 10000U; I++) {
TransferBatch *B = Allocator.popBatch(&Cache, ClassId);
if (!B) {
@ -181,8 +208,11 @@ TEST(ScudoPrimaryTest, Primary64OOM) {
Batches.push_back(B);
}
while (!Batches.empty()) {
Allocator.pushBatch(ClassId, Batches.back());
TransferBatch *B = Batches.back();
Batches.pop_back();
B->copyToArray(Blocks);
Allocator.pushBlocks(&Cache, ClassId, Blocks, B->getCount());
Cache.deallocate(Primary::SizeClassMap::BatchClassId, B);
}
Cache.destroy(nullptr);
Allocator.releaseToOS();
@ -294,3 +324,47 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) {
Cache.destroy(nullptr);
EXPECT_GT(Allocator->releaseToOS(), 0U);
}
SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) {
using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>;
std::unique_ptr<Primary> Allocator(new Primary);
Allocator->init(/*ReleaseToOsInterval=*/-1);
typename Primary::CacheT Cache;
Cache.init(nullptr, Allocator.get());
const scudo::uptr Size = 32U;
const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size);
// We will allocate 4 times the group size memory and release all of them. We
// expect the free blocks will be classified with groups. Then we will
// allocate the same amount of memory as group size and expect the blocks will
// have the max address difference smaller or equal to 2 times the group size.
// Note that it isn't necessary to be in the range of single group size
// because the way we get the group id is doing compact pointer shifting.
// According to configuration, the compact pointer may not align to group
// size. As a result, the blocks can cross two groups at most.
const scudo::uptr GroupSizeMem = (1ULL << Primary::GroupSizeLog);
const scudo::uptr PeakAllocationMem = 4 * GroupSizeMem;
const scudo::uptr PeakNumberOfAllocations = PeakAllocationMem / Size;
const scudo::uptr FinalNumberOfAllocations = GroupSizeMem / Size;
std::vector<scudo::uptr> Blocks;
std::mt19937 R;
for (scudo::uptr I = 0; I < PeakNumberOfAllocations; ++I)
Blocks.push_back(reinterpret_cast<scudo::uptr>(Cache.allocate(ClassId)));
std::shuffle(Blocks.begin(), Blocks.end(), R);
// Release all the allocated blocks, including those held by local cache.
while (!Blocks.empty()) {
Cache.deallocate(ClassId, reinterpret_cast<void *>(Blocks.back()));
Blocks.pop_back();
}
Cache.drain();
for (scudo::uptr I = 0; I < FinalNumberOfAllocations; ++I)
Blocks.push_back(reinterpret_cast<scudo::uptr>(Cache.allocate(ClassId)));
EXPECT_LE(*std::max_element(Blocks.begin(), Blocks.end()) -
*std::min_element(Blocks.begin(), Blocks.end()),
GroupSizeMem * 2);
}