mirror of https://github.com/ByConity/ByConity
356 lines
12 KiB
C++
356 lines
12 KiB
C++
#include <CloudServices/CnchBGThreadPartitionSelector.h>
|
|
|
|
#include <Catalog/Catalog.h>
|
|
#include <Columns/IColumn.h>
|
|
#include <Interpreters/Context.h>
|
|
#include <Interpreters/Context_fwd.h>
|
|
#include <Interpreters/executeQuery.h>
|
|
#include <Interpreters/InterpreterSelectQuery.h>
|
|
#include <Interpreters/StorageID.h>
|
|
#include <Storages/StorageCnchMergeTree.h>
|
|
|
|
#include <fmt/format.h>
|
|
|
|
namespace DB
|
|
{
|
|
const char * SQL_LOAD_PARTITION_DIGEST_INFO = R"(
|
|
SELECT uuid, partition_id,
|
|
countIf(event_type='InsertPart') AS insert_parts,
|
|
maxIf(event_time, event_type='InsertPart') as last_insert_time,
|
|
sumIf(num_source_parts, event_type='MergeParts') AS merge_parts,
|
|
countIf(event_type='RemovePart' and rows>0) AS remove_real_parts
|
|
FROM system.server_part_log
|
|
WHERE table NOT LIKE '%CHTMP' AND event_date >= today() - 7
|
|
GROUP BY uuid, partition_id
|
|
HAVING insert_parts > 0;
|
|
)";
|
|
|
|
CnchBGThreadPartitionSelector::CnchBGThreadPartitionSelector(ContextMutablePtr global_context_)
|
|
: WithMutableContext(global_context_), log(getLogger("PartitionSelector"))
|
|
{
|
|
try
|
|
{
|
|
auto system_db = DatabaseCatalog::instance().getDatabase("system", getContext());
|
|
if (!system_db->isTableExist("server_part_log", getContext()))
|
|
{
|
|
LOG_WARNING(log, "Table system.server_part_log does not exist!");
|
|
load_success = false;
|
|
return;
|
|
}
|
|
|
|
auto tmp_context = Context::createCopy(getContext());
|
|
auto block_io = executeQuery(SQL_LOAD_PARTITION_DIGEST_INFO, tmp_context, true);
|
|
auto input_block = block_io.getInputStream();
|
|
|
|
while (true)
|
|
{
|
|
auto res = input_block->read();
|
|
if (!res)
|
|
break;
|
|
|
|
auto * col_uuid = checkAndGetColumn<ColumnUUID>(*res.getByName("uuid").column);
|
|
auto * col_partition = checkAndGetColumn<ColumnString>(*res.getByName("partition_id").column);
|
|
auto * col_insert = checkAndGetColumn<ColumnUInt64>(*res.getByName("insert_parts").column);
|
|
auto * col_insert_time = checkAndGetColumn<ColumnUInt32>(*res.getByName("last_insert_time").column);
|
|
auto * col_merge = checkAndGetColumn<ColumnUInt64>(*res.getByName("merge_parts").column);
|
|
auto * col_remove = checkAndGetColumn<ColumnUInt64>(*res.getByName("remove_real_parts").column);
|
|
|
|
if (!col_uuid || col_uuid->size() == 0)
|
|
{
|
|
LOG_WARNING(log, "Failed to load server_part_log digest information from system.server_part_log.");
|
|
return;
|
|
}
|
|
|
|
auto size = col_uuid->size();
|
|
for (size_t i = 0; i < size; ++i)
|
|
{
|
|
UUID uuid = UUID(col_uuid->getElement(i));
|
|
String partition = col_partition->getDataAt(i).toString();
|
|
UInt64 insert = col_insert->get64(i);
|
|
UInt64 insert_time = col_insert_time->get64(i);
|
|
UInt64 merge = col_merge->get64(i);
|
|
UInt64 remove = col_remove->get64(i);
|
|
container[uuid][partition] = std::make_shared<Estimator>(partition, insert, merge, remove, insert_time);
|
|
}
|
|
LOG_INFO(log, "Successfully loaded {} server_part_log digest entries.", size);
|
|
}
|
|
|
|
return;
|
|
}
|
|
catch (...)
|
|
{
|
|
tryLogCurrentException(__PRETTY_FUNCTION__, "Failed to load server_part_log digest info.");
|
|
load_success = false;
|
|
}
|
|
}
|
|
|
|
bool CnchBGThreadPartitionSelector::needRoundRobinPick(const StoragePtr & storage, Type type, size_t & out_n_suggestion)
|
|
{
|
|
if (auto * cnch_table = dynamic_cast<StorageCnchMergeTree *>(storage.get()))
|
|
{
|
|
auto storage_settings = (*cnch_table).getSettings();
|
|
size_t interval{0};
|
|
if (type == MergeType)
|
|
{
|
|
interval = storage_settings->cnch_merge_round_robin_partitions_interval.value;
|
|
}
|
|
else if (type == GCType)
|
|
{
|
|
interval = storage_settings->cnch_gc_round_robin_partitions_interval.value;
|
|
out_n_suggestion = storage_settings->cnch_gc_round_robin_partitions_number;
|
|
}
|
|
|
|
if (!load_success)
|
|
{
|
|
LOG_TRACE(log, "Will use round-robin as load_success is false");
|
|
return true;
|
|
}
|
|
|
|
size_t last_time_sec{0};
|
|
{
|
|
std::lock_guard lock(round_robin_states_mutex);
|
|
last_time_sec = round_robin_states[type][storage->getStorageUUID()].last_time_sec;
|
|
}
|
|
|
|
return static_cast<size_t>(time(nullptr)) - last_time_sec > interval;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/// Try to select at most $n partitions from $storage for GC/Merge($type).
|
|
Strings CnchBGThreadPartitionSelector::doRoundRobinPick(const StoragePtr & storage, Type type, size_t n)
|
|
{
|
|
auto all_partitions = getContext()->getCnchCatalog()->getPartitionIDs(storage, nullptr);
|
|
auto size = all_partitions.size();
|
|
if (size == 0)
|
|
return {};
|
|
|
|
StringSet res;
|
|
auto storage_id = storage->getStorageID();
|
|
|
|
size_t start_index{0};
|
|
{
|
|
std::lock_guard lock(round_robin_states_mutex);
|
|
start_index = round_robin_states[type][storage_id.uuid].index;
|
|
round_robin_states[type][storage_id.uuid].index += (size <= n) ? 0 : n;
|
|
round_robin_states[type][storage_id.uuid].last_time_sec = static_cast<size_t>(time(nullptr));
|
|
}
|
|
|
|
if (size <= n)
|
|
{
|
|
LOG_TRACE(log, "{} Return all partitions for {}: {}",
|
|
storage_id.getNameForLogs(),
|
|
type == MergeType ? "Merge" : "GC",
|
|
fmt::format("{}", fmt::join(all_partitions, ",")));
|
|
return all_partitions;
|
|
}
|
|
|
|
for (size_t i = 0; i < n; ++i)
|
|
{
|
|
res.insert(all_partitions[start_index++ % size]);
|
|
}
|
|
LOG_TRACE(log, "{} Round-robin partitions for {}: {}",
|
|
storage_id.getNameForLogs(),
|
|
type == MergeType ? "Merge" : "GC",
|
|
fmt::format("{}", fmt::join(res, ",")));
|
|
return {res.begin(), res.end()};
|
|
}
|
|
|
|
Strings CnchBGThreadPartitionSelector::selectForMerge(const StoragePtr & storage, size_t n, bool only_realtime_partitions)
|
|
{
|
|
auto storage_id = storage->getStorageID();
|
|
if (n == 0)
|
|
{
|
|
LOG_TRACE(log, "{} Use round-robin as required partition size is 0", storage_id.getNameForLogs());
|
|
return doRoundRobinPick(storage, MergeType, 1);
|
|
}
|
|
|
|
if (needRoundRobinPick(storage, MergeType, n))
|
|
{
|
|
LOG_TRACE(log, "{} Use round-robin as needRoundRobinPick() returns true", storage_id.getNameForLogs());
|
|
return doRoundRobinPick(storage, MergeType, n);
|
|
}
|
|
|
|
auto uuid = storage_id.uuid;
|
|
std::vector<EstimatorPtr> candidates{};
|
|
|
|
std::unique_lock lock(mutex);
|
|
auto estimators = container[uuid];
|
|
lock.unlock();
|
|
|
|
/// Sometimes there might not be enough data in system.server_part_log, eg: server start without binding K8s PVC.
|
|
/// Then we need to select more partitions from Catalog (by round-robin strategy).
|
|
if (n > estimators.size())
|
|
{
|
|
LOG_TRACE(log, "{} Select all known partitions and do a round-robin selection as known_partitions.size {} < required_size {}",
|
|
storage_id.getNameForLogs(),
|
|
estimators.size(),
|
|
n);
|
|
|
|
StringSet res;
|
|
for (const auto & [_, estimator] : estimators)
|
|
res.insert(estimator->partition);
|
|
|
|
auto round_robin_res = doRoundRobinPick(storage, MergeType, n);
|
|
for (const auto & p : round_robin_res)
|
|
res.insert(p);
|
|
|
|
return {res.begin(), res.end()};
|
|
}
|
|
|
|
for (const auto & [_, estimator] : estimators)
|
|
{
|
|
if (estimator->eligibleForMerge())
|
|
candidates.push_back(estimator);
|
|
}
|
|
|
|
/// candidates.size() == 0
|
|
if (candidates.empty())
|
|
{
|
|
LOG_TRACE(log, "{} Use round-robin as no available candidate partitions", storage_id.getNameForLogs());
|
|
return doRoundRobinPick(storage, MergeType, n);
|
|
}
|
|
|
|
/// candidates.size() <= n
|
|
if (auto size = candidates.size(); size <= n)
|
|
{
|
|
Strings all;
|
|
all.reserve(size);
|
|
for (const auto & candidate : candidates)
|
|
all.push_back(candidate->partition);
|
|
LOG_TRACE(log, "{} MergeMutateThread: select all candidate partitions: {}", storage_id.getNameForLogs(), fmt::format("{}", fmt::join(all, ",")));
|
|
|
|
return all;
|
|
}
|
|
|
|
/// candidates.size() > n
|
|
Strings res{};
|
|
size_t i = 0;
|
|
size_t k = only_realtime_partitions ? n : (n + 1) / 2;
|
|
|
|
/// Sort k partitions in insert_time order.
|
|
std::nth_element(
|
|
candidates.begin(),
|
|
candidates.begin() + k,
|
|
candidates.end(),
|
|
[](auto & lhs, auto & rhs) { return lhs->last_insert_time > rhs->last_insert_time; }
|
|
);
|
|
|
|
/// Sort following k partitions in merge_speed order.
|
|
if (!only_realtime_partitions && k < n)
|
|
{
|
|
std::nth_element(
|
|
candidates.begin() + k,
|
|
candidates.begin() + n,
|
|
candidates.end(),
|
|
[](auto & lhs, auto & rhs) { return lhs->merge_speed < rhs->merge_speed; }
|
|
);
|
|
}
|
|
|
|
for (const auto & candidate : candidates)
|
|
{
|
|
auto type = i < k ? "realtime" : "slow";
|
|
LOG_TRACE(log, "{} MergeMutateThread: select {} partition {}", storage_id.getNameForLogs(), type, candidate->partition);
|
|
res.push_back(candidate->partition);
|
|
if (++i >= n)
|
|
break;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
Strings CnchBGThreadPartitionSelector::selectForGC(const StoragePtr & storage)
|
|
{
|
|
size_t n = 10;
|
|
if (needRoundRobinPick(storage, GCType, n))
|
|
return doRoundRobinPick(storage, GCType, n);
|
|
|
|
auto storage_id = storage->getStorageID();
|
|
auto uuid = storage_id.uuid;
|
|
std::vector<EstimatorPtr> candidates{};
|
|
|
|
std::unique_lock lock(mutex);
|
|
auto estimators = container[uuid];
|
|
lock.unlock();
|
|
|
|
if (estimators.empty())
|
|
{
|
|
LOG_TRACE(log, "{} GCThread: do round-robin as estimators is empty.", storage_id.getNameForLogs());
|
|
return doRoundRobinPick(storage, GCType, n);
|
|
}
|
|
LOG_TRACE(log, "{} GCThread: estimators size - {}", storage_id.getNameForLogs(), estimators.size());
|
|
|
|
for (const auto & [_, estimator] : estimators)
|
|
{
|
|
if (estimator->eligibleForGC())
|
|
candidates.push_back(estimator);
|
|
}
|
|
|
|
if (candidates.empty())
|
|
{
|
|
LOG_TRACE(log, "{} GCThread: do round-robin as candidates is empty.", storage_id.getNameForLogs());
|
|
return doRoundRobinPick(storage, GCType, n);
|
|
}
|
|
LOG_TRACE(log, "{} GCThread: candidates size - {}", storage_id.getNameForLogs(), candidates.size());
|
|
|
|
Strings res{};
|
|
size_t i = 0;
|
|
res.reserve(n);
|
|
|
|
std::nth_element(
|
|
candidates.begin(),
|
|
candidates.begin() + n,
|
|
candidates.end(),
|
|
[](auto & lhs, auto & rhs) { return lhs->gc_speed < rhs->gc_speed; }
|
|
);
|
|
|
|
for (const auto & candidate : candidates)
|
|
{
|
|
LOG_TRACE(log, "{} GCThread: select partition {}", storage_id.getNameForLogs(), candidate->partition);
|
|
res.push_back(candidate->partition);
|
|
if (++i >= n)
|
|
break;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
void CnchBGThreadPartitionSelector::addInsertParts(UUID uuid, const String & partition, size_t num, time_t ts)
|
|
{
|
|
std::lock_guard lock(mutex);
|
|
if (auto it = container[uuid].find(partition); it != container[uuid].end())
|
|
it->second->insert(num, ts);
|
|
else
|
|
container[uuid][partition] = std::make_shared<Estimator>(partition, num, 0, 0, ts);
|
|
}
|
|
|
|
void CnchBGThreadPartitionSelector::addMergeParts(UUID uuid, const String & partition, size_t num, time_t ts)
|
|
{
|
|
std::lock_guard lock(mutex);
|
|
if (auto it = container[uuid].find(partition); it != container[uuid].end())
|
|
it->second->merge(num);
|
|
else
|
|
container[uuid][partition] = std::make_shared<Estimator>(partition, 0, num, 0, ts);
|
|
}
|
|
|
|
void CnchBGThreadPartitionSelector::addRemoveParts(UUID uuid, const String & partition, size_t num, time_t ts)
|
|
{
|
|
std::lock_guard lock(mutex);
|
|
if (auto it = container[uuid].find(partition); it != container[uuid].end())
|
|
it->second->remove(num);
|
|
else
|
|
container[uuid][partition] = std::make_shared<Estimator>(partition, 0, 0, num, ts);
|
|
}
|
|
|
|
void CnchBGThreadPartitionSelector::postponeMerge(UUID uuid, const String & partition)
|
|
{
|
|
std::lock_guard lock(mutex);
|
|
if (auto it = container[uuid].find(partition); it != container[uuid].end())
|
|
it->second->markPostponeForMerge();
|
|
|
|
}
|
|
|
|
}
|