mirror of https://github.com/ByConity/ByConity
169 lines
6.4 KiB
C++
169 lines
6.4 KiB
C++
/*
|
|
* Copyright (2022) Bytedance Ltd. and/or its affiliates
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <ResourceManagement/CommonData.h>
|
|
#include <Common/RWLock.h>
|
|
#include <Common/Configurations.h>
|
|
#include <CloudServices/RpcLeaderClientBase.h>
|
|
#include <Interpreters/Context.h>
|
|
#include <Interpreters/Context_fwd.h>
|
|
#include <Poco/Util/AbstractConfiguration.h>
|
|
|
|
namespace DB
|
|
{
|
|
namespace Protos
|
|
{
|
|
class ResourceManagerService_Stub;
|
|
}
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int BRPC_CONNECT_ERROR;
|
|
extern const int BRPC_EXCEPTION;
|
|
extern const int BRPC_HOST_DOWN;
|
|
extern const int BRPC_TIMEOUT;
|
|
extern const int NO_SUCH_SERVICE;
|
|
extern const int RESOURCE_MANAGER_NO_LEADER_ELECTED;
|
|
}
|
|
|
|
namespace ResourceManagement
|
|
{
|
|
struct WorkerNode;
|
|
|
|
String fetchRMAddressByPSM(ContextPtr context);
|
|
String fetchRMAddressFromKeeper(ContextPtr context);
|
|
|
|
class ResourceManagerClient : public WithContext, public RpcLeaderClientBase
|
|
{
|
|
friend class ResourceReporterTask;
|
|
public:
|
|
static String getName() { return "ResourceManagerClient"; }
|
|
|
|
ResourceManagerClient(ContextPtr global_context_);
|
|
~ResourceManagerClient() override;
|
|
|
|
void getVirtualWarehouse(const std::string & name, VirtualWarehouseData & vw_data);
|
|
void createVirtualWarehouse(const std::string & vw_name, const VirtualWarehouseSettings & vw_settings, bool if_not_exists);
|
|
void updateVirtualWarehouse(const std::string & vw_name, const VirtualWarehouseAlterSettings & vw_alter_settings);
|
|
void dropVirtualWarehouse(const std::string & vw_name, const bool if_exists);
|
|
void getAllVirtualWarehouses(std::vector<VirtualWarehouseData> & vw_data_list);
|
|
|
|
void createWorkerGroup(const String & group_id, bool if_not_exists, const String & vw_name, const WorkerGroupData & group_data);
|
|
void dropWorkerGroup(const String & group_id, bool if_exists);
|
|
std::vector<WorkerGroupData> getAllWorkerGroups(bool with_metrics = false);
|
|
|
|
void getAllWorkers(std::vector<WorkerNodeResourceData> & data);
|
|
void getWorkerGroups(const std::string & vw_name, std::vector<WorkerGroupData> & groups_data);
|
|
bool reportResourceUsage(const WorkerNodeResourceData & data);
|
|
|
|
void registerWorker(const WorkerNodeResourceData & data);
|
|
void removeWorker(const String & worker_id, const String & vw_name, const String & group_id);
|
|
|
|
WorkerGroupData pickWorkerGroup(const String & vw_name, VWScheduleAlgo vw_schedule_algo, const ResourceRequirement & requirement);
|
|
HostWithPorts pickWorker(const String & vw_name, VWScheduleAlgo vw_schedule_algo, const ResourceRequirement & requirement);
|
|
|
|
AggQueryQueueMap syncQueueDetails(VWQueryQueueMap vw_query_queue_map
|
|
, std::vector<String> * deleted_vw_list);
|
|
|
|
private:
|
|
using Stub = Protos::ResourceManagerService_Stub;
|
|
mutable RWLock leader_mutex = RWLockImpl::create();
|
|
std::unique_ptr<Stub> stub;
|
|
|
|
String fetchRMAddress() const;
|
|
|
|
RWLockImpl::LockHolder getReadLock() const
|
|
{
|
|
return leader_mutex->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
|
|
}
|
|
|
|
RWLockImpl::LockHolder getWriteLock() const
|
|
{
|
|
return leader_mutex->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);
|
|
}
|
|
|
|
/** Overloaded function, where process_response is only executed
|
|
* after the response has been determined to be from a leader
|
|
*
|
|
*/
|
|
template <typename RMResponse, typename RpcFunc, typename RespHandler>
|
|
void callToLeaderWrapper(RMResponse & response, RpcFunc & rpc_func, RespHandler & process_response)
|
|
{
|
|
auto is_leader = callToLeaderWrapper(response, rpc_func);
|
|
if (is_leader)
|
|
process_response(response);
|
|
}
|
|
|
|
/** Helper function that ensures that RPC calls are sent to the leader node.
|
|
* Updates stub based on RM node's response or via ByteJournal
|
|
* rpc_func should not contain any response processing
|
|
* Depends on is_leader field and leader_host_port of RPC response
|
|
*/
|
|
template <typename RMResponse, typename RpcFunc>
|
|
bool callToLeaderWrapper(RMResponse & response, RpcFunc & rpc_func)
|
|
{
|
|
auto max_retry_count = getContext()->getRootConfig().resource_manager.max_retry_times.value;
|
|
size_t retry_count = 0;
|
|
do
|
|
{
|
|
try
|
|
{
|
|
{
|
|
auto lock = getReadLock();
|
|
rpc_func(stub);
|
|
}
|
|
|
|
/// Finish the process with true flag if the response is from the leader and no exception thrown.
|
|
if (response.has_is_leader() && response.is_leader())
|
|
return true;
|
|
}
|
|
catch (const Exception & e)
|
|
{
|
|
if (!(e.code() == ErrorCodes::BRPC_HOST_DOWN || e.code() == ErrorCodes::BRPC_CONNECT_ERROR
|
|
|| e.code() == ErrorCodes::NO_SUCH_SERVICE || e.code() == ErrorCodes::RESOURCE_MANAGER_NO_LEADER_ELECTED
|
|
|| e.code() == ErrorCodes::BRPC_TIMEOUT || e.code() == ErrorCodes::BRPC_EXCEPTION)
|
|
|| retry_count == max_retry_count)
|
|
throw;
|
|
|
|
tryLogDebugCurrentException(__PRETTY_FUNCTION__);
|
|
}
|
|
|
|
auto lock = getWriteLock();
|
|
auto new_leader = fetchRMAddress();
|
|
if (new_leader.empty())
|
|
{
|
|
LOG_ERROR(log, "There is no active elected RM leader");
|
|
throw Exception("No active RM leader", ErrorCodes::RESOURCE_MANAGER_NO_LEADER_ELECTED);
|
|
}
|
|
/// Update leader address and retry in case of any exception.
|
|
else
|
|
{
|
|
LOG_DEBUG(log, "Updating RM Leader to " + new_leader);
|
|
stub = std::make_unique<Stub>(&updateChannel(new_leader));
|
|
}
|
|
} while (retry_count++ < max_retry_count);
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
}
|
|
|
|
using ResourceManagerClient = ResourceManagement::ResourceManagerClient;
|
|
using ResourceManagerClientPtr = std::shared_ptr<ResourceManagerClient>;
|
|
}
|