mirror of https://github.com/ByConity/ByConity
Merge branch 'cnch_hualloc_test' into 'cnch-dev'
feat(clickhousech@m-3348167131): cnch-hualloc See merge request dp/ClickHouse!20751 # Conflicts: # build_bin.sh # src/Common/config.h.in # src/Storages/System/attachSystemTables.cpp
This commit is contained in:
parent
076fec9944
commit
981165eb0f
|
@ -20,6 +20,7 @@ export CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=../output -DCMAKE_BUILD_TYPE=${CMAKE_
|
|||
CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=../output ${CMAKE_FLAGS}"
|
||||
CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=${CUSTOM_CMAKE_BUILD_TYPE:-RelWithDebInfo} $CMAKE_FLAGS"
|
||||
CMAKE_FLAGS="-DENABLE_BREAKPAD=ON $CMAKE_FLAGS" # enable minidump
|
||||
CMAKE_FLAGS="-DENABLE_HUALLOC=OFF ${CMAKE_FLAGS}"
|
||||
[[ -n "$CUSTOM_SANITIZE" ]] && CMAKE_FLAGS="-DSANITIZE=$CUSTOM_SANITIZE $CMAKE_FLAGS"
|
||||
[[ -n "$CUSTOM_MAX_LINKING_JOBS" ]] && CMAKE_FLAGS="-DPARALLEL_LINK_JOBS=${CUSTOM_MAX_LINKING_JOBS} ${CMAKE_FLAGS}"
|
||||
[[ -n "$CUSTOM_MAX_COMPILE_JOBS" ]] && CMAKE_FLAGS="-DPARALLEL_COMPILE_JOBS=${CUSTOM_MAX_COMPILE_JOBS} ${CMAKE_FLAGS}"
|
||||
|
|
|
@ -436,3 +436,7 @@ endif()
|
|||
if (USE_TSQUERY)
|
||||
add_subdirectory(TSQuery-cmake)
|
||||
endif()
|
||||
|
||||
if (ENABLE_HUALLOC)
|
||||
add_subdirectory (hualloc-cmake)
|
||||
endif()
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
if (SANITIZE OR NOT (
|
||||
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE OR ARCH_RISCV64)) OR
|
||||
(OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"))))
|
||||
if (ENABLE_HUALLOC)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL}
|
||||
"hualloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
|
||||
endif ()
|
||||
set (ENABLE_HUALLOC OFF)
|
||||
else ()
|
||||
option (ENABLE_HUALLOC "Enable hualloc allocator" ${ENABLE_LIBRARIES})
|
||||
endif ()
|
||||
# set (ENABLE_HUALLOC ON)
|
||||
|
||||
if (ENABLE_HUALLOC)
|
||||
message ( "Enable hualloc allocator")
|
||||
add_library(hualloc "${ClickHouse_SOURCE_DIR}/contrib/hualloc/hu_alloc.cpp")
|
||||
# add_library(ch_contrib::hualloc ALIAS _hualloc)
|
||||
endif()
|
|
@ -0,0 +1,323 @@
|
|||
#include "hu_alloc.h"
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
void* hu_calloc(size_t n, size_t elem_size)
|
||||
{
|
||||
// Overflow check
|
||||
const size_t size = n * elem_size;
|
||||
if (elem_size != 0 && size / elem_size != n) return nullptr;
|
||||
|
||||
void* result = hu_alloc(size);
|
||||
if (result != nullptr)
|
||||
{
|
||||
memset(result, 0, size);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void* hu_alloc_aligned(size_t size, size_t align)
|
||||
{
|
||||
if (align > PAGE_SIZE)
|
||||
abort();
|
||||
return hu_alloc(align > size ? align : size);
|
||||
}
|
||||
|
||||
void* hu_realloc(void* old_ptr, size_t new_size)
|
||||
{
|
||||
if (old_ptr == nullptr)
|
||||
{
|
||||
void* result = hu_alloc(new_size);
|
||||
return result;
|
||||
}
|
||||
if (new_size == 0)
|
||||
{
|
||||
hu_free(old_ptr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* new_ptr = hu_alloc(new_size);
|
||||
if (new_ptr == nullptr)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t old_size = hu_getsize(old_ptr);
|
||||
memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
|
||||
hu_free(old_ptr);
|
||||
return new_ptr;
|
||||
}
|
||||
|
||||
void hu_free_w(void *p)
|
||||
{
|
||||
hu_free(p);
|
||||
}
|
||||
|
||||
void* hu_alloc_w(size_t sz)
|
||||
{
|
||||
return hu_alloc(sz);
|
||||
}
|
||||
|
||||
void hu_check_init_w()
|
||||
{
|
||||
hu_check_init();
|
||||
}
|
||||
|
||||
void* ReclaimThread(void *args)
|
||||
{
|
||||
// keep & max can be separate for large & segment spaces
|
||||
const char * sleep_second = std::getenv("HUALLOC_CLAIM_INTERVAL");
|
||||
int sleep = 3;
|
||||
try
|
||||
{
|
||||
if (sleep_second && std::strlen(sleep_second) > 0)
|
||||
sleep = atoi(sleep_second);
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
sleep = 3;
|
||||
}
|
||||
|
||||
yint cached = *(yint *) args;
|
||||
if (sleep > 0)
|
||||
{
|
||||
for (;;) {
|
||||
Sleep(sleep * 1000);
|
||||
ui64 total_cached = LargeCached() + SegmentCached();
|
||||
if (total_cached < cached * 2)
|
||||
continue;
|
||||
|
||||
LargeReclaim(cached, ReclaimMaxReclaim);
|
||||
SegmentReclaim(cached, ReclaimMaxReclaim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ui64 LargeCached()
|
||||
{
|
||||
if (AllocatorIsInitialized != 1)
|
||||
return 0;
|
||||
ui64 total = 0;
|
||||
for (yint g = 0; g < LARGE_GROUP_COUNT; ++g)
|
||||
{
|
||||
TLargeGroupInfo &gg = LargeGroupInfo[g];
|
||||
total += _mm_popcnt_u64(gg.FreeBlockMask & gg.CommitedMask);
|
||||
}
|
||||
return total * LARGE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
ui64 SegmentCached()
|
||||
{
|
||||
if (AllocatorIsInitialized != 1)
|
||||
return 0;
|
||||
ui64 total = 0;
|
||||
for (yint g = 0; g < SEGMENT_GROUP_COUNT; ++g)
|
||||
{
|
||||
TSegmentGroupInfo &gg = SegmentGroupInfo[g];
|
||||
total += _mm_popcnt_u64(gg.GoodForReclaimMask);
|
||||
}
|
||||
|
||||
return total * SEGMENT_SIZE;
|
||||
}
|
||||
|
||||
ui64 HugeAlloc()
|
||||
{
|
||||
return GAllocCnt.load();
|
||||
}
|
||||
|
||||
ui64 LargeReclaimed()
|
||||
{
|
||||
return LargeReclaimCnt.load();
|
||||
}
|
||||
|
||||
ui64 SegmentReclaimed()
|
||||
{
|
||||
return SegmentReclaimCnt.load();
|
||||
}
|
||||
|
||||
/* mbind Policies */
|
||||
#define MPOL_DEFAULT 0
|
||||
#define MPOL_PREFERRED 1
|
||||
#define MPOL_BIND 2
|
||||
#define MPOL_INTERLEAVE 3
|
||||
#define MPOL_LOCAL 4
|
||||
#define MPOL_MAX 5
|
||||
|
||||
#define __NR_mbind 237
|
||||
|
||||
static long mbind_bytedance(void *start, unsigned long len, int mode,
|
||||
const unsigned long *nmask, unsigned long maxnode, unsigned flags)
|
||||
{
|
||||
return syscall(__NR_mbind, (long)start, len, mode, (long)nmask,
|
||||
maxnode, flags);
|
||||
}
|
||||
|
||||
bool hualloc_use_numa_info = false;
|
||||
bool hualloc_enable_mbind = false;
|
||||
int hualloc_mbind_mode = MPOL_BIND;
|
||||
void (*hualloc_logger)(std::string) = nullptr;
|
||||
|
||||
void hualloc_log(std::string s)
|
||||
{
|
||||
if (hualloc_logger)
|
||||
hualloc_logger(s);
|
||||
else
|
||||
printf(s.c_str());
|
||||
}
|
||||
|
||||
size_t hualloc_numa_node_count = 0;
|
||||
std::unordered_map<size_t, size_t> hualloc_cpu_index_to_numa_node;
|
||||
|
||||
size_t hualloc_used_numa_node_count = 0;
|
||||
std::unordered_map<size_t, size_t> hualloc_used_numa_nodes_to_mem_index; // node index -> mem index for node
|
||||
|
||||
void mbind_memory(char *mem, size_t size, int alignment)
|
||||
{
|
||||
int alignment_count = size/alignment;
|
||||
|
||||
for (auto & hualloc_used_numa_node : hualloc_used_numa_nodes_to_mem_index)
|
||||
{
|
||||
int mem_index = hualloc_used_numa_node.second;
|
||||
int numa_node = hualloc_used_numa_node.first;
|
||||
char *mem_cur = mem + (alignment_count/hualloc_numa_node_count) * mem_index * alignment;
|
||||
char *mem_next = mem + (alignment_count/hualloc_numa_node_count) * (mem_index+1) * alignment;
|
||||
uint64_t mbind_mask = 1ull<<numa_node;
|
||||
|
||||
int res = mbind_bytedance(mem_cur, mem_next-mem_cur, hualloc_mbind_mode, &mbind_mask, hualloc_numa_node_count+1, 0);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "hualloc numa info: bind mem [" << static_cast<void*>(mem_cur) << ", " << static_cast<void*>(mem_next)
|
||||
<< ") len 0x" << std::hex << mem_next-mem_cur
|
||||
<< " @ index " << mem_index << " -> numa node " << numa_node << " return " << res ;
|
||||
ss << " err: " << errno << "-" << strerror(errno) << std::endl;
|
||||
hualloc_log(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
std::string getCpuListOfNumaNode(int numa_noe)
|
||||
{
|
||||
std::set<int> cpu_set;
|
||||
std::string cpu_list;
|
||||
for (auto & item : hualloc_cpu_index_to_numa_node)
|
||||
if (item.second == numa_noe)
|
||||
cpu_set.insert(item.first);
|
||||
cpu_list += "[";
|
||||
for (auto cpu_index : cpu_set)
|
||||
cpu_list += std::to_string(cpu_index) + ",";
|
||||
if (!cpu_list.empty()) {
|
||||
cpu_list.pop_back();
|
||||
}
|
||||
cpu_list += "]";
|
||||
return cpu_list;
|
||||
}
|
||||
|
||||
void huallocSetNumaInfo(
|
||||
size_t max_numa_node_,
|
||||
std::vector<cpu_set_t> & numa_nodes_cpu_mask_,
|
||||
bool hualloc_enable_mbind_,
|
||||
int mbind_mode,
|
||||
void (*logger)(std::string)
|
||||
)
|
||||
{
|
||||
hualloc_logger = logger;
|
||||
if (max_numa_node_ <= 0 || numa_nodes_cpu_mask_.size() != max_numa_node_+1)
|
||||
return;
|
||||
hualloc_enable_mbind = hualloc_enable_mbind_;
|
||||
hualloc_numa_node_count = max_numa_node_+1;
|
||||
hualloc_mbind_mode = mbind_mode;
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "hualloc numa info: max_numa_node: " << max_numa_node_ << ", numa_nodes_cpu_mask.size(): " << numa_nodes_cpu_mask_.size() << std::endl;
|
||||
hualloc_log(ss.str());
|
||||
for (int i = 0; i < numa_nodes_cpu_mask_.size(); ++i)
|
||||
{
|
||||
cpu_set_t cpu_mask = numa_nodes_cpu_mask_[i];
|
||||
for (int cpu_index = 0; cpu_index < CPU_SETSIZE; ++cpu_index)
|
||||
{
|
||||
if (CPU_ISSET(cpu_index, &cpu_mask))
|
||||
{
|
||||
hualloc_cpu_index_to_numa_node[cpu_index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cpu_set_t progress_cpu_mask;
|
||||
std::set<int> progress_used_numa_nodes;
|
||||
CPU_ZERO(&progress_cpu_mask);
|
||||
if (sched_getaffinity(0, sizeof(cpu_set_t), &progress_cpu_mask) == -1) {
|
||||
hualloc_log("sched_getaffinity fail");
|
||||
return;
|
||||
}
|
||||
for (int cpu_index = 0; cpu_index < CPU_SETSIZE; ++cpu_index)
|
||||
{
|
||||
if (CPU_ISSET(cpu_index, &progress_cpu_mask))
|
||||
{
|
||||
progress_used_numa_nodes.insert(hualloc_cpu_index_to_numa_node[cpu_index]);
|
||||
}
|
||||
}
|
||||
hualloc_used_numa_node_count = progress_used_numa_nodes.size();
|
||||
if (hualloc_used_numa_node_count <= 0 || hualloc_used_numa_node_count > hualloc_numa_node_count)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "hualloc numa info: hualloc_used_numa_node_count is " << hualloc_used_numa_node_count << ", hualloc_numa_node_count is "
|
||||
<< hualloc_numa_node_count << ". Won't set hualloc_use_numa_info\n";
|
||||
hualloc_log(ss.str());
|
||||
return;
|
||||
}
|
||||
int mem_index = 0;
|
||||
for (int hualloc_used_numa_node : progress_used_numa_nodes)
|
||||
{
|
||||
std::string cpu_list = getCpuListOfNumaNode(hualloc_used_numa_node);
|
||||
std::stringstream ss;
|
||||
ss << "hualloc numa info: numa node(" << hualloc_used_numa_node << ") -> mem_index(" << mem_index << ") -> cpu list: "
|
||||
<< cpu_list.c_str() << std::endl;
|
||||
hualloc_log(ss.str());
|
||||
|
||||
hualloc_used_numa_nodes_to_mem_index[hualloc_used_numa_node] = mem_index;
|
||||
++mem_index;
|
||||
}
|
||||
|
||||
hualloc_use_numa_info = true;
|
||||
}
|
||||
|
||||
int get_thread_numa_mem_index()
|
||||
{
|
||||
int cpu = sched_getcpu();
|
||||
|
||||
int numa_node_index = hualloc_cpu_index_to_numa_node[cpu];
|
||||
return hualloc_used_numa_nodes_to_mem_index[numa_node_index];
|
||||
}
|
||||
|
||||
ui64 GetTotalLargeAlloc()
|
||||
{
|
||||
return TotalLargeAlloc.load();
|
||||
}
|
||||
|
||||
ui64 GetTotalLargeFree()
|
||||
{
|
||||
return TotalLargeFree.load();
|
||||
}
|
||||
|
||||
ui64 GetTotalSegmentAlloc()
|
||||
{
|
||||
return TotalSegmentAlloc.load();
|
||||
}
|
||||
|
||||
ui64 GetTotalSegmentFree()
|
||||
{
|
||||
return TotalSegmentFree.load();
|
||||
}
|
||||
|
||||
ui64 GetTotalGiantAlloc()
|
||||
{
|
||||
return TotalGiantAlloc.load();
|
||||
}
|
||||
|
||||
ui64 GetTotalGiantFree()
|
||||
{
|
||||
return TotalGiantFree.load();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -138,6 +138,7 @@
|
|||
#include <common/phdr_cache.h>
|
||||
#include <common/scope_guard.h>
|
||||
#include <Common/ChineseTokenExtractor.h>
|
||||
#include <Common/HuAllocator.h>
|
||||
|
||||
#include <CloudServices/CnchServerClientPool.h>
|
||||
|
||||
|
@ -227,7 +228,6 @@ namespace DB::ErrorCodes
|
|||
int mainEntryClickHouseServer(int argc, char ** argv)
|
||||
{
|
||||
DB::Server app;
|
||||
|
||||
if (jemallocOptionEnabled("opt.background_thread"))
|
||||
{
|
||||
LOG_ERROR(&app.logger(),
|
||||
|
@ -534,6 +534,12 @@ void checkForUsersNotInMainConfig(
|
|||
#endif
|
||||
}
|
||||
|
||||
void huallocLogPrint(std::string s)
|
||||
{
|
||||
static Poco::Logger * logger = &Poco::Logger::get("HuallocDebug");
|
||||
LOG_INFO(logger, s);
|
||||
}
|
||||
|
||||
int Server::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
Poco::Logger * log = &logger();
|
||||
|
@ -1008,6 +1014,37 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
|||
}
|
||||
BrpcApplication::getInstance().reloadConfig(*config);
|
||||
|
||||
#if USE_HUALLOC
|
||||
if (config->getBool("hualloc_numa_aware", false))
|
||||
{
|
||||
size_t max_numa_node = SystemUtils::getMaxNumaNode();
|
||||
std::vector<cpu_set_t> numa_nodes_cpu_mask = SystemUtils::getNumaNodesCpuMask();
|
||||
bool hualloc_enable_mbind = config->getBool("hualloc_enable_mbind", false);
|
||||
int mbind_mode = config->getInt("hualloc_mbind_mode", 1);
|
||||
|
||||
/*
|
||||
*mbind mode
|
||||
#define MPOL_DEFAULT 0
|
||||
#define MPOL_PREFERRED 1
|
||||
#define MPOL_BIND 2
|
||||
#define MPOL_INTERLEAVE 3
|
||||
#define MPOL_LOCAL 4
|
||||
#define MPOL_MAX 5
|
||||
*/
|
||||
huallocSetNumaInfo(
|
||||
max_numa_node,
|
||||
numa_nodes_cpu_mask,
|
||||
hualloc_enable_mbind,
|
||||
mbind_mode,
|
||||
huallocLogPrint
|
||||
);
|
||||
}
|
||||
|
||||
double default_hualloc_cache_ratio = config->getDouble("hualloc_cache_ratio", 0.25);
|
||||
LOG_INFO(log, "HuAlloc cache memory size:{}",
|
||||
formatReadableSizeWithBinarySuffix(max_server_memory_usage * default_hualloc_cache_ratio));
|
||||
HuAllocator<false>::InitHuAlloc(max_server_memory_usage * default_hualloc_cache_ratio);
|
||||
#endif
|
||||
total_memory_tracker.setHardLimit(max_server_memory_usage);
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
|
|
|
@ -329,11 +329,19 @@ list (APPEND DBMS_COMMON_LIBRARIES ch_contrib::abseil_swiss_tables)
|
|||
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
|
||||
add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
|
||||
target_link_libraries (dbms PRIVATE jemalloc libdivide ${DBMS_COMMON_LIBRARIES})
|
||||
if (USE_HUALLOC)
|
||||
target_link_libraries (dbms PRIVATE hualloc)
|
||||
endif()
|
||||
|
||||
set (all_modules dbms)
|
||||
else()
|
||||
add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
|
||||
target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES})
|
||||
target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide)
|
||||
if (USE_HUALLOC)
|
||||
target_link_libraries(clickhouse_interpreters PRIVATE hualloc)
|
||||
endif()
|
||||
|
||||
list (APPEND all_modules dbms)
|
||||
# force all split libs to be linked
|
||||
if (OS_DARWIN)
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Allows to access internal array of fixed-size column without cast to concrete type.
|
||||
* We will inherit ColumnVector and ColumnFixedString from this class instead of IColumn.
|
||||
* Assumes data layout of ColumnVector, ColumnFixedString and PODArray.
|
||||
*
|
||||
* Why it is needed?
|
||||
*
|
||||
* There are some algorithms that specialize on the size of data type but doesn't care about concrete type.
|
||||
* The same specialization may work for UInt64, Int64, Float64, FixedString(8), if it only does byte moving and hashing.
|
||||
* To avoid code bloat and compile time increase, we can use single template instantiation for these cases
|
||||
* and just static_cast pointer to some single column type (e. g. ColumnUInt64) assuming that all types have identical memory layout.
|
||||
*
|
||||
* But this static_cast (downcast to unrelated type) is illegal according to the C++ standard and UBSan warns about it.
|
||||
* To allow functional tests to work under UBSan we have to separate some base class that will present the memory layout in explicit way,
|
||||
* and we will do static_cast to this class.
|
||||
*/
|
||||
class ColumnFixedSizeHelper : public IColumn
|
||||
{
|
||||
public:
|
||||
template <size_t ELEMENT_SIZE>
|
||||
const char * getRawDataBegin() const
|
||||
{
|
||||
tryToFlushZeroCopyBuffer();
|
||||
#if USE_HUALLOC
|
||||
return reinterpret_cast<const PODArrayBase<ELEMENT_SIZE, 4096, HuAllocator<false>, 15, 16> *>(reinterpret_cast<const char *>(this) + sizeof(*this))->raw_data();
|
||||
#else
|
||||
return reinterpret_cast<const PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16> *>(reinterpret_cast<const char *>(this) + sizeof(*this))->raw_data();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <size_t ELEMENT_SIZE>
|
||||
void insertRawData(const char * ptr)
|
||||
{
|
||||
tryToFlushZeroCopyBuffer();
|
||||
#if USE_HUALLOC
|
||||
return reinterpret_cast<PODArrayBase<ELEMENT_SIZE, 4096, HuAllocator<false>, 15, 16> *>(reinterpret_cast<char *>(this) + sizeof(*this))->push_back_raw(ptr);
|
||||
#else
|
||||
return reinterpret_cast<PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16> *>(reinterpret_cast<char *>(this) + sizeof(*this))->push_back_raw(ptr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -29,14 +29,22 @@ public:
|
|||
const char * getRawDataBegin() const
|
||||
{
|
||||
tryToFlushZeroCopyBuffer();
|
||||
#if USE_HUALLOC
|
||||
return reinterpret_cast<const PODArrayBase<ELEMENT_SIZE, 4096, HuAllocator<false>, 15, 16> *>(reinterpret_cast<const char *>(this) + sizeof(*this))->raw_data();
|
||||
#else
|
||||
return reinterpret_cast<const PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16> *>(reinterpret_cast<const char *>(this) + sizeof(*this))->raw_data();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <size_t ELEMENT_SIZE>
|
||||
void insertRawData(const char * ptr)
|
||||
{
|
||||
tryToFlushZeroCopyBuffer();
|
||||
#if USE_HUALLOC
|
||||
return reinterpret_cast<PODArrayBase<ELEMENT_SIZE, 4096, HuAllocator<false>, 15, 16> *>(reinterpret_cast<char *>(this) + sizeof(*this))->push_back_raw(ptr);
|
||||
#else
|
||||
return reinterpret_cast<PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16> *>(reinterpret_cast<char *>(this) + sizeof(*this))->push_back_raw(ptr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -8,3 +8,6 @@ class Allocator;
|
|||
|
||||
template <typename Base, size_t N = 64, size_t Alignment = 1>
|
||||
class AllocatorWithStackMemory;
|
||||
|
||||
template <bool clear_memory>
|
||||
class HuAllocator;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <Common/memcpySmall.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/HuAllocator.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
|
@ -38,7 +39,11 @@ private:
|
|||
static constexpr size_t pad_right = 15;
|
||||
|
||||
/// Contiguous MemoryChunk of memory and pointer to free space inside it. Member of single-linked list.
|
||||
#if USE_HUALLOC
|
||||
struct alignas(16) MemoryChunk : private HuAllocator<false> /// empty base optimization
|
||||
#else
|
||||
struct alignas(16) MemoryChunk : private Allocator<false> /// empty base optimization
|
||||
#endif
|
||||
{
|
||||
char * begin;
|
||||
char * pos;
|
||||
|
@ -51,7 +56,11 @@ private:
|
|||
ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
|
||||
ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
|
||||
|
||||
#if USE_HUALLOC
|
||||
begin = reinterpret_cast<char *>(HuAllocator<false>::alloc(size_));
|
||||
#else
|
||||
begin = reinterpret_cast<char *>(Allocator<false>::alloc(size_));
|
||||
#endif
|
||||
pos = begin;
|
||||
end = begin + size_ - pad_right;
|
||||
prev = prev_;
|
||||
|
@ -66,8 +75,11 @@ private:
|
|||
/// memory would stay poisoned forever. If the allocator supports
|
||||
/// asan, it will correctly poison the memory by itself.
|
||||
ASAN_UNPOISON_MEMORY_REGION(begin, size());
|
||||
|
||||
#if USE_HUALLOC
|
||||
HuAllocator<false>::free(begin, size());
|
||||
#else
|
||||
Allocator<false>::free(begin, size());
|
||||
#endif
|
||||
|
||||
if (prev)
|
||||
delete prev;
|
||||
|
|
|
@ -83,7 +83,11 @@ protected:
|
|||
|
||||
|
||||
/// Switches to ordinary Allocator after REAL_ALLOCATION_TRESHOLD bytes to avoid fragmentation and trash in Arena.
|
||||
#if USE_HUALLOC
|
||||
template <size_t REAL_ALLOCATION_TRESHOLD = 4096, typename TRealAllocator = HuAllocator<false>, typename TArenaAllocator = ArenaAllocator, size_t alignment = 0>
|
||||
#else
|
||||
template <size_t REAL_ALLOCATION_TRESHOLD = 4096, typename TRealAllocator = Allocator<false>, typename TArenaAllocator = ArenaAllocator, size_t alignment = 0>
|
||||
#endif
|
||||
class MixedArenaAllocator : private TRealAllocator
|
||||
{
|
||||
public:
|
||||
|
@ -122,9 +126,13 @@ protected:
|
|||
};
|
||||
|
||||
|
||||
#if USE_HUALLOC
|
||||
template <size_t alignment, size_t REAL_ALLOCATION_TRESHOLD = 4096>
|
||||
using MixedAlignedArenaAllocator = MixedArenaAllocator<REAL_ALLOCATION_TRESHOLD, HuAllocator<false>, AlignedArenaAllocator<alignment>, alignment>;
|
||||
#else
|
||||
template <size_t alignment, size_t REAL_ALLOCATION_TRESHOLD = 4096>
|
||||
using MixedAlignedArenaAllocator = MixedArenaAllocator<REAL_ALLOCATION_TRESHOLD, Allocator<false>, AlignedArenaAllocator<alignment>, alignment>;
|
||||
|
||||
#endif
|
||||
|
||||
template <size_t N = 64, typename Base = ArenaAllocator>
|
||||
class ArenaAllocatorWithStackMemory : public Base
|
||||
|
|
|
@ -21,7 +21,11 @@ namespace DB
|
|||
* When allocating, we take the head of the list of free blocks,
|
||||
* or, if the list is empty - allocate a new block using Arena.
|
||||
*/
|
||||
#if USE_HUALLOC
|
||||
class ArenaWithFreeLists : private HuAllocator<false>, private boost::noncopyable
|
||||
#else
|
||||
class ArenaWithFreeLists : private Allocator<false>, private boost::noncopyable
|
||||
#endif
|
||||
{
|
||||
private:
|
||||
/// If the block is free, then the pointer to the next free block is stored at its beginning, or nullptr, if there are no more free blocks.
|
||||
|
@ -58,8 +62,13 @@ public:
|
|||
|
||||
char * alloc(const size_t size)
|
||||
{
|
||||
#if USE_HUALLOC
|
||||
if (size > max_fixed_block_size)
|
||||
return static_cast<char *>(HuAllocator<false>::alloc(size));
|
||||
#else
|
||||
if (size > max_fixed_block_size)
|
||||
return static_cast<char *>(Allocator<false>::alloc(size));
|
||||
#endif
|
||||
|
||||
/// find list of required size
|
||||
const auto list_idx = findFreeListIndex(size);
|
||||
|
@ -90,8 +99,13 @@ public:
|
|||
|
||||
void free(char * ptr, const size_t size)
|
||||
{
|
||||
#if USE_HUALLOC
|
||||
if (size > max_fixed_block_size)
|
||||
return HuAllocator<false>::free(ptr, size);
|
||||
#else
|
||||
if (size > max_fixed_block_size)
|
||||
return Allocator<false>::free(ptr, size);
|
||||
#endif
|
||||
|
||||
/// find list of required size
|
||||
const auto list_idx = findFreeListIndex(size);
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/HuAllocator.h>
|
||||
|
||||
|
||||
/**
|
||||
|
@ -8,7 +10,11 @@
|
|||
* table, so it makes sense to pre-fault the pages so that page faults don't
|
||||
* interrupt the resize loop. Set the allocator parameter accordingly.
|
||||
*/
|
||||
#if USE_HUALLOC
|
||||
using HashTableAllocator = HuAllocator<true>;
|
||||
#else
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
|
||||
#endif
|
||||
|
||||
template <size_t initial_bytes = 64>
|
||||
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
#include "HuAllocator.h"
|
||||
|
||||
#if USE_HUALLOC
|
||||
template class HuAllocator<false>;
|
||||
template class HuAllocator<true>;
|
||||
#endif
|
|
@ -0,0 +1,200 @@
|
|||
#pragma once
|
||||
#include <string.h>
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define ALLOCATOR_ASLR 0
|
||||
#else
|
||||
#define ALLOCATOR_ASLR 1
|
||||
#endif
|
||||
|
||||
#include <pcg_random.hpp>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <common/getPageSize.h>
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
|
||||
#include <common/errnoToString.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_HUALLOC
|
||||
#include <hualloc/hu_alloc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
static constexpr size_t HUMALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
template <bool clear_memory_>
|
||||
class HuAllocator
|
||||
{
|
||||
public:
|
||||
|
||||
/// Allocate memory range.
|
||||
void * alloc(size_t size, size_t alignment = 0)
|
||||
{
|
||||
checkSize(size);
|
||||
CurrentMemoryTracker::alloc(size);
|
||||
void * ptr = allocNoTrack(size, alignment);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/// Free memory range.
|
||||
void free(void * buf, size_t size)
|
||||
{
|
||||
try
|
||||
{
|
||||
checkSize(size);
|
||||
freeNoTrack(buf);
|
||||
CurrentMemoryTracker::free(size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
DB::tryLogCurrentException("HugeAllocator::free");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/** Enlarge memory range.
|
||||
* Data from old range is moved to the beginning of new range.
|
||||
* Address of memory range could change.
|
||||
*/
|
||||
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
|
||||
{
|
||||
checkSize(new_size);
|
||||
|
||||
if (old_size == new_size)
|
||||
{
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
}
|
||||
else if (alignment <= HUMALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
CurrentMemoryTracker::free(old_size);
|
||||
CurrentMemoryTracker::alloc(new_size);
|
||||
void * new_buf = hu_realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
{
|
||||
DB::throwFromErrno(
|
||||
fmt::format("HugeAllocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
|
||||
buf = new_buf;
|
||||
|
||||
if constexpr (clear_memory)
|
||||
if (new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void InitHuAlloc(size_t cached)
|
||||
{
|
||||
hu_check_init_w();
|
||||
pthread_t tid;
|
||||
size_t use_cache = cached / 2;
|
||||
if (use_cache <= 0)
|
||||
use_cache = 1024 * (1ull << 20); /// If not set properly use 1G as default
|
||||
pthread_create(&tid, nullptr, ReclaimThread, &use_cache);
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr size_t getStackThreshold()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static constexpr bool clear_memory = clear_memory_;
|
||||
|
||||
private:
|
||||
|
||||
void * allocNoTrack(size_t size, size_t alignment)
|
||||
{
|
||||
void * buf;
|
||||
if (alignment <= HUMALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
buf = hu_calloc(size, 1);
|
||||
else
|
||||
buf = hu_alloc_w(size);
|
||||
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno(fmt::format("HugeAllocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = hu_alloc_aligned(size, alignment);
|
||||
|
||||
if (!buf)
|
||||
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
|
||||
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, errno);
|
||||
|
||||
if constexpr (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void freeNoTrack(void * buf)
|
||||
{
|
||||
hu_free_w(buf);
|
||||
}
|
||||
|
||||
void checkSize(size_t size)
|
||||
{
|
||||
/// More obvious exception in case of possible overflow (instead of just "Cannot mmap").
|
||||
if (size >= 0x8000000000000000ULL)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to HugeAllocator. It indicates an error.", size);
|
||||
}
|
||||
};
|
||||
|
||||
/** When using AllocatorWithStackMemory, located on the stack,
|
||||
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
|
||||
* In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
|
||||
*/
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
|
||||
#endif
|
||||
|
||||
/// Prevent implicit template instantiation of HugeAllocator
|
||||
|
||||
extern template class HuAllocator<false>;
|
||||
extern template class HuAllocator<true>;
|
||||
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
|
@ -5,7 +5,17 @@ namespace DB
|
|||
|
||||
/// Used for left padding of PODArray when empty
|
||||
const char empty_pod_array[empty_pod_array_size]{};
|
||||
#if USE_HUALLOC
|
||||
template class PODArray<UInt8, 4096, HuAllocator<false>, 15, 16>;
|
||||
template class PODArray<UInt16, 4096, HuAllocator<false>, 15, 16>;
|
||||
template class PODArray<UInt32, 4096, HuAllocator<false>, 15, 16>;
|
||||
template class PODArray<UInt64, 4096, HuAllocator<false>, 15, 16>;
|
||||
|
||||
template class PODArray<Int8, 4096, HuAllocator<false>, 15, 16>;
|
||||
template class PODArray<Int16, 4096, HuAllocator<false>, 15, 16>;
|
||||
template class PODArray<Int32, 4096, HuAllocator<false>, 15, 16>;
|
||||
template class PODArray<Int64, 4096, HuAllocator<false>, 15, 16>;
|
||||
#else
|
||||
template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
|
||||
template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
|
||||
template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
|
||||
|
@ -15,5 +25,6 @@ template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
|
|||
template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
|
||||
template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
|
||||
template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <common/strong_typedef.h>
|
||||
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/HuAllocator.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/memcpySmall.h>
|
||||
|
@ -843,7 +844,17 @@ void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_> & lhs, P
|
|||
#pragma GCC diagnostic pop
|
||||
|
||||
/// Prevent implicit template instantiation of PODArray for common numeric types
|
||||
#if USE_HUALLOC
|
||||
extern template class PODArray<UInt8, 4096, HuAllocator<false>, 15, 16>;
|
||||
extern template class PODArray<UInt16, 4096, HuAllocator<false>, 15, 16>;
|
||||
extern template class PODArray<UInt32, 4096, HuAllocator<false>, 15, 16>;
|
||||
extern template class PODArray<UInt64, 4096, HuAllocator<false>, 15, 16>;
|
||||
|
||||
extern template class PODArray<Int8, 4096, HuAllocator<false>, 15, 16>;
|
||||
extern template class PODArray<Int16, 4096, HuAllocator<false>, 15, 16>;
|
||||
extern template class PODArray<Int32, 4096, HuAllocator<false>, 15, 16>;
|
||||
extern template class PODArray<Int64, 4096, HuAllocator<false>, 15, 16>;
|
||||
#else
|
||||
extern template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
|
||||
extern template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
|
||||
extern template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
|
||||
|
@ -853,5 +864,5 @@ extern template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
|
|||
extern template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
|
||||
extern template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
|
||||
extern template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <common/types.h>
|
||||
#include <Common/Allocator_fwd.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -15,6 +16,25 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend)
|
|||
return ((value + dividend - 1) / dividend) * dividend;
|
||||
}
|
||||
|
||||
#if USE_HUALLOC
|
||||
template <typename T, size_t initial_bytes = 4096,
|
||||
typename TAllocator = HuAllocator<false>, size_t pad_right_ = 0,
|
||||
size_t pad_left_ = 0>
|
||||
class PODArray;
|
||||
|
||||
template <typename T, size_t initial_bytes = 4096, typename TAllocator = HuAllocator<false>>
|
||||
using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 15, 16>;
|
||||
|
||||
/** A helper for declaring PODArray that uses inline memory.
|
||||
* The initial size is set to use all the inline bytes, since using less would
|
||||
* only add some extra allocation calls.
|
||||
*/
|
||||
template <typename T, size_t inline_bytes,
|
||||
size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
|
||||
using PODArrayWithStackMemory = PODArray<T, rounded_bytes,
|
||||
AllocatorWithStackMemory<HuAllocator<false>, rounded_bytes, alignof(T)>>;
|
||||
|
||||
#else
|
||||
template <typename T, size_t initial_bytes = 4096,
|
||||
typename TAllocator = Allocator<false>, size_t pad_right_ = 0,
|
||||
size_t pad_left_ = 0>
|
||||
|
@ -32,5 +52,7 @@ template <typename T, size_t inline_bytes,
|
|||
size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
|
||||
using PODArrayWithStackMemory = PODArray<T, rounded_bytes,
|
||||
AllocatorWithStackMemory<Allocator<false>, rounded_bytes, alignof(T)>>;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -12,6 +12,10 @@ constexpr auto linux_numa_cpu_file_online = "/sys/devices/system/node/online";
|
|||
constexpr auto linux_numa_cpu_file_possible = "/sys/devices/system/node/possible";
|
||||
size_t max_numa_node = 0;
|
||||
|
||||
std::mutex numa_nodes_cpu_mask_mutex;
|
||||
bool numa_nodes_cpu_mask_initialized = false;
|
||||
std::vector<cpu_set_t> numa_nodes_cpu_mask;
|
||||
|
||||
size_t buffer_to_number(const std::string & buffer)
|
||||
{
|
||||
try
|
||||
|
@ -50,4 +54,97 @@ __attribute__((constructor)) static void init_max_numa_node()
|
|||
|
||||
try_read_max_numa_nude(linux_numa_cpu_file_possible);
|
||||
}
|
||||
|
||||
std::vector<size_t> parse_cpu_list(const std::string & cpu_list_str)
|
||||
{
|
||||
std::unique_ptr<DB::UInt16> lb_cache = nullptr;
|
||||
DB::Int32 digit_cache = -1;
|
||||
std::vector<size_t> cpu_list;
|
||||
for (auto it = cpu_list_str.cbegin();; it++)
|
||||
{
|
||||
if (it == cpu_list_str.cend() || *it == ',')
|
||||
{
|
||||
if (digit_cache < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid format of cpu_list: {}", cpu_list_str);
|
||||
if (!lb_cache)
|
||||
cpu_list.emplace_back(digit_cache);
|
||||
else
|
||||
{
|
||||
auto start = *lb_cache.release();
|
||||
|
||||
for (int i = start; i <= digit_cache; i++)
|
||||
{
|
||||
cpu_list.emplace_back(i);
|
||||
}
|
||||
}
|
||||
if (it == cpu_list_str.cend())
|
||||
break;
|
||||
digit_cache = -1;
|
||||
}
|
||||
else if (*it >= '0' && *it <= '9')
|
||||
{
|
||||
digit_cache = digit_cache > 0 ? digit_cache * 10 + (*it - 48) : (*it - 48);
|
||||
}
|
||||
else if (std::isspace(*it))
|
||||
{
|
||||
|
||||
}
|
||||
else if (*it == '-')
|
||||
{
|
||||
if (digit_cache < 0 || lb_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid format of cpu_list: {}", cpu_list_str);
|
||||
lb_cache = std::make_unique<DB::UInt16>(digit_cache);
|
||||
digit_cache = -1;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid format of cpu_list: {}", cpu_list_str);
|
||||
}
|
||||
return cpu_list;
|
||||
}
|
||||
|
||||
void init_numa_nodes_cpu_mask()
|
||||
{
|
||||
numa_nodes_cpu_mask.resize(max_numa_node+1);
|
||||
|
||||
for (size_t numa_node = 0; numa_node < numa_nodes_cpu_mask.size(); ++numa_node)
|
||||
{
|
||||
CPU_ZERO(&numa_nodes_cpu_mask[numa_node]);
|
||||
|
||||
std::string cpu_list_path = fmt::format("/sys/devices/system/node/node{}/cpulist", numa_node);
|
||||
if (!std::filesystem::exists(cpu_list_path))
|
||||
continue;
|
||||
std::ifstream fstream(cpu_list_path);
|
||||
std::stringstream buffer;
|
||||
buffer << fstream.rdbuf();
|
||||
if (buffer.str().empty())
|
||||
continue;
|
||||
|
||||
try
|
||||
{
|
||||
auto cpu_list = parse_cpu_list(buffer.str());
|
||||
for (auto cpu_index : cpu_list)
|
||||
CPU_SET(cpu_index, &numa_nodes_cpu_mask[numa_node]);
|
||||
}
|
||||
catch (std::exception &)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<cpu_set_t> SystemUtils::getNumaNodesCpuMask()
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (numa_nodes_cpu_mask_initialized)
|
||||
return numa_nodes_cpu_mask;
|
||||
std::unique_lock lock(numa_nodes_cpu_mask_mutex);
|
||||
if (numa_nodes_cpu_mask_initialized)
|
||||
return numa_nodes_cpu_mask;
|
||||
init_numa_nodes_cpu_mask();
|
||||
numa_nodes_cpu_mask_initialized = true;
|
||||
return numa_nodes_cpu_mask;
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -62,6 +62,8 @@ struct CpuUsageInfo
|
|||
|
||||
size_t buffer_to_number(const std::string & buffer);
|
||||
|
||||
void init_numa_nodes_cpu_mask();
|
||||
|
||||
class SystemUtils
|
||||
{
|
||||
public:
|
||||
|
@ -160,6 +162,8 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static std::vector<cpu_set_t> getNumaNodesCpuMask();
|
||||
|
||||
static void getCpuUsageInfo(const std::unordered_set<size_t> & cpu_nodes, std::vector<CpuUsageInfo> & cpu_usage_info_vec)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
|
@ -194,4 +198,7 @@ public:
|
|||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<size_t> parse_cpu_list(const std::string & cpu_list_str);
|
||||
|
||||
}
|
||||
|
|
|
@ -33,3 +33,4 @@
|
|||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_RAPIDJSON
|
||||
#cmakedefine01 USE_NLP
|
||||
#cmakedefine01 USE_HUALLOC
|
||||
|
|
|
@ -19,3 +19,4 @@
|
|||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_RAPIDJSON
|
||||
#cmakedefine01 USE_NLP
|
||||
#cmakedefine01 USE_HUALLOC
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include <common/logger_useful.h>
|
||||
#include <fmt/format.h>
|
||||
#include <common/errnoToString.h>
|
||||
#include <Common/HuAllocator.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
|
@ -730,17 +731,37 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
|||
Int64 amount = total_memory_tracker.get();
|
||||
Int64 peak = total_memory_tracker.getPeak();
|
||||
Int64 new_amount = data.resident;
|
||||
[[maybe_unused]]Int64 free_memory_in_allocator_arenas = 0;
|
||||
|
||||
#if USE_HUALLOC
|
||||
/// During hualloc, the cached memory should be treat as free memory, for safety keep 0.2 as buffer for concurrent alloc
|
||||
/// Which assume the alloc size shoule be less than cached_memory * 1.2
|
||||
Int64 hualloc_cache = (SegmentCached() + LargeCached()) * 0.8;
|
||||
new_amount -= hualloc_cache;
|
||||
Int64 difference = new_amount - amount;
|
||||
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
|
||||
// if (difference >= 1048576 || difference <= -1048576)
|
||||
LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"),
|
||||
"MemoryTracking: was {}, peak {}, free memory in arenas {}, hard limit will set to {}, RSS: {}, difference: {}, hualloc cache:{}",
|
||||
ReadableSize(amount),
|
||||
ReadableSize(peak),
|
||||
ReadableSize(free_memory_in_allocator_arenas),
|
||||
ReadableSize(new_amount),
|
||||
ReadableSize(new_amount + hualloc_cache),
|
||||
ReadableSize(difference),
|
||||
ReadableSize(hualloc_cache));
|
||||
#else
|
||||
Int64 difference = new_amount - amount;
|
||||
|
||||
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
|
||||
if (difference >= 1048576 || difference <= -1048576)
|
||||
LOG_TRACE(&Poco::Logger::get("AsynchronousMetrics"),
|
||||
"MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
|
||||
ReadableSize(amount),
|
||||
ReadableSize(peak),
|
||||
ReadableSize(new_amount),
|
||||
ReadableSize(difference));
|
||||
LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"),
|
||||
"MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
|
||||
ReadableSize(amount),
|
||||
ReadableSize(peak),
|
||||
ReadableSize(new_amount),
|
||||
ReadableSize(difference));
|
||||
|
||||
#endif
|
||||
|
||||
total_memory_tracker.set(new_amount);
|
||||
CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount);
|
||||
|
|
|
@ -56,6 +56,7 @@ const char * auto_config_build[]
|
|||
"TZDATA_VERSION", "@TZDATA_VERSION@",
|
||||
"USE_KRB5", "@USE_KRB5@",
|
||||
"USE_BYTEDANCE_RDKAFKA", "@USE_BYTEDANCE_RDKAFKA@",
|
||||
"USE_HUALLOC", "@USE_HUALLOC@",
|
||||
|
||||
nullptr, nullptr
|
||||
};
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Storages/System/StorageSystemHuAllocStats.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_HUALLOC
|
||||
# include <hualloc/hu_alloc.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
StorageSystemHuAllocStats::StorageSystemHuAllocStats(const StorageID & table_id_)
|
||||
: IStorage(table_id_)
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
ColumnsDescription desc;
|
||||
auto columns = getNamesAndTypes();
|
||||
for (const auto & col : columns)
|
||||
{
|
||||
ColumnDescription col_desc(col.name, col.type);
|
||||
desc.add(col_desc);
|
||||
}
|
||||
storage_metadata.setColumns(desc);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageSystemHuAllocStats::getNamesAndTypes()
|
||||
{
|
||||
return {
|
||||
{ "GiantAlloc", std::make_shared<DataTypeUInt64>() },
|
||||
{ "LargeReclaim", std::make_shared<DataTypeUInt64>() },
|
||||
{ "SegmentReclaim", std::make_shared<DataTypeUInt64>() },
|
||||
{ "LargeCached", std::make_shared<DataTypeString>() },
|
||||
{ "SegmentCached", std::make_shared<DataTypeString>() },
|
||||
{ "LargeAllocate", std::make_shared<DataTypeUInt64>() },
|
||||
{ "LargeFree", std::make_shared<DataTypeUInt64>() },
|
||||
{ "SegmentAllocate", std::make_shared<DataTypeUInt64>() },
|
||||
{ "SegmentFree", std::make_shared<DataTypeUInt64>() },
|
||||
{ "GiantAllocate", std::make_shared<DataTypeUInt64>() },
|
||||
{ "GiantFree", std::make_shared<DataTypeUInt64>() },
|
||||
};
|
||||
}
|
||||
|
||||
Pipe StorageSystemHuAllocStats::read(
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo &,
|
||||
ContextPtr /*context*/,
|
||||
QueryProcessingStage::Enum /*processed_stage*/,
|
||||
const size_t /*max_block_size*/,
|
||||
const unsigned /*num_streams*/)
|
||||
{
|
||||
storage_snapshot->check(column_names);
|
||||
|
||||
auto header = storage_snapshot->getMetadataForQuery()->getSampleBlockWithVirtuals(getVirtuals());
|
||||
MutableColumns res_columns = header.cloneEmptyColumns();
|
||||
|
||||
#if USE_HUALLOC
|
||||
size_t col_num = 0;
|
||||
res_columns.at(col_num++)->insert(HugeAlloc());
|
||||
res_columns.at(col_num++)->insert(LargeReclaimed());
|
||||
res_columns.at(col_num++)->insert(SegmentReclaimed());
|
||||
res_columns.at(col_num++)->insert(formatReadableSizeWithBinarySuffix(LargeCached()));
|
||||
res_columns.at(col_num++)->insert(formatReadableSizeWithBinarySuffix(SegmentCached()));
|
||||
res_columns.at(col_num++)->insert(GetTotalLargeAlloc());
|
||||
res_columns.at(col_num++)->insert(GetTotalLargeFree());
|
||||
res_columns.at(col_num++)->insert(GetTotalSegmentAlloc());
|
||||
res_columns.at(col_num++)->insert(GetTotalSegmentFree());
|
||||
res_columns.at(col_num++)->insert(GetTotalGiantAlloc());
|
||||
res_columns.at(col_num++)->insert(GetTotalGiantFree());
|
||||
#else
|
||||
LOG_INFO(&Poco::Logger::get("StorageSystemHuAllocStats"), "HuAlloc is not enabled");
|
||||
#endif // USE_HUALLOC
|
||||
|
||||
UInt64 num_rows = res_columns.at(0)->size();
|
||||
Chunk chunk(std::move(res_columns), num_rows);
|
||||
|
||||
return Pipe(std::make_shared<SourceFromSingleChunk>(std::move(header), std::move(chunk)));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
#pragma once
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class StorageSystemHuAllocStats final : public shared_ptr_helper<StorageSystemHuAllocStats>, public IStorage
|
||||
{
|
||||
friend struct shared_ptr_helper<StorageSystemHuAllocStats>;
|
||||
public:
|
||||
explicit StorageSystemHuAllocStats(const StorageID & table_id_);
|
||||
|
||||
std::string getName() const override { return "SystemHuAllocStats"; }
|
||||
|
||||
static NamesAndTypesList getNamesAndTypes();
|
||||
|
||||
Pipe read(
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
unsigned num_streams) override;
|
||||
|
||||
bool isSystemStorage() const override { return true; }
|
||||
|
||||
// bool supportsTransactions() const override { return true; }
|
||||
};
|
||||
|
||||
}
|
|
@ -19,6 +19,8 @@
|
|||
* All Bytedance's Modifications are Copyright (2023) Bytedance Ltd. and/or its affiliates.
|
||||
*/
|
||||
|
||||
|
||||
#include <Storages/System/StorageSystemHuAllocStats.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Storages/System/attachSystemTables.h>
|
||||
#include <Storages/System/attachSystemTablesImpl.h>
|
||||
|
@ -277,6 +279,7 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper)
|
|||
attach<StorageSystemPersistentBGJobStatus>(system_database, "persistent_bg_job_status");
|
||||
attach<StorageSystemGlobalGCManager>(system_database, "global_gc_manager");
|
||||
attach<StorageSystemLockMap>(system_database, "lock_map");
|
||||
attach<StorageSystemHuAllocStats>( system_database, "hualloc_stats");
|
||||
|
||||
attach<StorageSystemWorkers>(system_database, "workers");
|
||||
attach<StorageSystemWorkerGroups>(system_database, "worker_groups");
|
||||
|
|
|
@ -9,6 +9,11 @@ endif()
|
|||
if (TARGET ch_contrib::ulid)
|
||||
set(USE_ULID 1)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_rust::blake3)
|
||||
set(USE_BLAKE3 1)
|
||||
endif()
|
||||
|
||||
if (TARGET hualloc AND ENABLE_HUALLOC)
|
||||
set(USE_HUALLOC 1)
|
||||
endif()
|
||||
|
|
Loading…
Reference in New Issue