Daily cherry-pick from internal (#1099)

* Merge branch 'support_json_type' into 'cnch-ce-merge'

feat(clickhousech@m-13539135):support json type

See merge request dp/ClickHouse!15288
# Conflicts:
#	src/Analyzers/QueryRewriter.cpp
#	src/Catalog/Catalog.cpp
#	src/Catalog/Catalog.h
#	src/CloudServices/CnchServerServiceImpl.cpp
#	src/CloudServices/CnchWorkerResource.cpp
#	src/CloudServices/CnchWorkerServiceImpl.cpp
#	src/Common/config.h.in
#	src/DataStreams/ReadDictInputStream.cpp
#	src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
#	src/Interpreters/DatabaseCatalog.cpp
#	src/Interpreters/TreeRewriter.cpp
#	src/MergeTreeCommon/MergeTreeMetaBase.h
#	src/Protos/DataModelHelpers.cpp
#	src/Protos/cnch_worker_rpc.proto
#	src/QueryPlan/PlanSegmentSourceStep.cpp
#	src/QueryPlan/TableScanStep.cpp
#	src/Storages/BitEngine/BitEngineHelper.cpp
#	src/Storages/BitEngineEncodePartitionHelper.cpp
#	src/Storages/MergeTree/MergeTreeSequentialSource.cpp
#	src/Storages/StorageCnchMergeTree.cpp
#	src/Storages/StorageCnchMergeTree.h
#	src/WorkerTasks/MergeTreeDataMerger.cpp
#	tests/queries/0_ce_problematic_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql
This commit is contained in:
Dao Minh Thuc 2024-01-26 10:38:39 +08:00 committed by GitHub
parent cc4b38d2c5
commit de5bd273f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
412 changed files with 9968 additions and 1515 deletions

View File

@ -0,0 +1,38 @@
#pragma once
#include <magic_enum.hpp>
#include <fmt/format.h>
template <class T> concept is_enum = std::is_enum_v<T>;
namespace detail
{
template <is_enum E, class F, size_t ...I>
constexpr void static_for(F && f, std::index_sequence<I...>)
{
(std::forward<F>(f)(std::integral_constant<E, magic_enum::enum_value<E>(I)>()) , ...);
}
}
/**
* Iterate over enum values in compile-time (compile-time switch/case, loop unrolling).
*
* @example static_for<E>([](auto enum_value) { return template_func<enum_value>(); }
* ^ enum_value can be used as a template parameter
*/
template <is_enum E, class F>
constexpr void static_for(F && f)
{
constexpr size_t count = magic_enum::enum_count<E>();
detail::static_for<E>(std::forward<F>(f), std::make_index_sequence<count>());
}
/// Enable printing enum values as strings via fmt + magic_enum
template <is_enum T>
struct fmt::formatter<T> : fmt::formatter<std::string_view>
{
constexpr auto format(T value, auto& format_context)
{
return formatter<string_view>::format(magic_enum::enum_name(value), format_context);
}
};

View File

@ -328,6 +328,10 @@ if (USE_SIMDJSON)
add_subdirectory (simdjson-cmake) add_subdirectory (simdjson-cmake)
endif() endif()
if (USE_RAPIDJSON)
add_subdirectory (rapidjson-cmake)
endif()
if (USE_BREAKPAD) if (USE_BREAKPAD)
add_subdirectory(breakpad-cmake) add_subdirectory(breakpad-cmake)
endif() endif()

View File

@ -0,0 +1,4 @@
set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include")
add_library(rapidjson INTERFACE)
target_include_directories(rapidjson BEFORE INTERFACE "${RAPIDJSON_INCLUDE_DIR}")

View File

@ -655,7 +655,7 @@ void ExprAnalyzerVisitor::processSubqueryArgsWithCoercion(ASTPtr & lhs_ast, ASTP
{ {
DataTypePtr super_type = nullptr; DataTypePtr super_type = nullptr;
if (enable_implicit_type_conversion) if (enable_implicit_type_conversion)
super_type = getLeastSupertype({lhs_type, rhs_type}, allow_extended_conversion); super_type = getLeastSupertype(DataTypes{lhs_type, rhs_type}, allow_extended_conversion);
if (!super_type) if (!super_type)
throw Exception("Incompatible types for IN prediacte", ErrorCodes::TYPE_MISMATCH); throw Exception("Incompatible types for IN prediacte", ErrorCodes::TYPE_MISMATCH);
if (!lhs_type->equals(*super_type)) if (!lhs_type->equals(*super_type))

View File

@ -869,7 +869,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing(ASTTableJoin & table_join, Scope
{ {
try try
{ {
output_type = getLeastSupertype({left_type, right_type}, allow_extended_conversion); output_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion);
} }
catch (DB::Exception & ex) catch (DB::Exception & ex)
{ {
@ -961,7 +961,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing(ASTTableJoin & table_join, Scope
{ {
try try
{ {
output_type = getLeastSupertype({left_type, right_type}, allow_extended_conversion); output_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion);
} }
catch (DB::Exception & ex) catch (DB::Exception & ex)
{ {
@ -1159,7 +1159,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinOn(ASTTableJoin & table_join, ScopePtr
{ {
try try
{ {
super_type = getLeastSupertype({left_type, right_type}, allow_extended_conversion); super_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion);
} }
catch (DB::Exception & ex) catch (DB::Exception & ex)
{ {

View File

@ -44,6 +44,7 @@
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h> #include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Storages/StorageSnapshot.h>
namespace DB namespace DB
{ {
@ -433,7 +434,10 @@ namespace
} }
StoragePtr storage = joined_tables.getLeftTableStorage(); StoragePtr storage = joined_tables.getLeftTableStorage();
rewrite_context.result.emplace(source_columns, storage, storage ? storage->getInMemoryMetadataPtr() : nullptr); StorageSnapshotPtr storage_snapshot = nullptr;
if (storage)
storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
rewrite_context.result.emplace(source_columns, storage, storage_snapshot);
auto & result = *(rewrite_context.result); auto & result = *(rewrite_context.result);
if (tables_with_columns.size() > 1) if (tables_with_columns.size() > 1)
@ -486,8 +490,15 @@ namespace
for (const auto & col : result.analyzed_join->columnsFromJoinedTable()) for (const auto & col : result.analyzed_join->columnsFromJoinedTable())
all_source_columns_set.insert(col.name); all_source_columns_set.insert(col.name);
} }
normalizeNameAndAliases(node, result.aliases, all_source_columns_set, settings, context, result.storage, normalizeNameAndAliases(
result.metadata_snapshot, graphviz_index); node,
result.aliases,
all_source_columns_set,
settings,
context,
result.storage,
result.storage_snapshot ? result.storage_snapshot->metadata : nullptr,
graphviz_index);
} }
// 5. Call `TreeOptimizer` since some optimizations will change the query result // 5. Call `TreeOptimizer` since some optimizations will change the query result

View File

@ -613,6 +613,14 @@ if (USE_LIBPQXX)
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${LIBPQXX_INCLUDE_DIR}) dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${LIBPQXX_INCLUDE_DIR})
endif() endif()
if (USE_SIMDJSON)
dbms_target_link_libraries(PUBLIC simdjson)
endif()
if (USE_RAPIDJSON)
dbms_target_link_libraries(PUBLIC rapidjson)
endif()
if(USE_CPP_JIEBA) if(USE_CPP_JIEBA)
dbms_target_link_libraries(PUBLIC ${CPP_JIEBA_LIBRARY}) dbms_target_link_libraries(PUBLIC ${CPP_JIEBA_LIBRARY})
endif() endif()
@ -621,7 +629,6 @@ if (USE_TSQUERY)
dbms_target_link_libraries(PUBLIC ${TSQUERY_LIBRARY}) dbms_target_link_libraries(PUBLIC ${TSQUERY_LIBRARY})
endif() endif()
if (TARGET ch_contrib::ulid) if (TARGET ch_contrib::ulid)
dbms_target_link_libraries (PUBLIC ch_contrib::ulid) dbms_target_link_libraries (PUBLIC ch_contrib::ulid)
endif() endif()
@ -639,6 +646,7 @@ if (USE_NLP)
endif() endif()
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${PARALLEL_HASHMAP_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${PARALLEL_HASHMAP_INCLUDE_DIR})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${MAGIC_ENUM_INCLUDE_DIR})
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")

View File

@ -13,7 +13,13 @@
* limitations under the License. * limitations under the License.
*/ */
#include <algorithm>
#include <iterator> #include <iterator>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include <Catalog/Catalog.h> #include <Catalog/Catalog.h>
#include <Catalog/CatalogFactory.h> #include <Catalog/CatalogFactory.h>
#include <Catalog/DataModelPartWrapper.h> #include <Catalog/DataModelPartWrapper.h>
@ -33,6 +39,11 @@
#include <Common/RpcClientPool.h> #include <Common/RpcClientPool.h>
#include <Common/serverLocality.h> #include <Common/serverLocality.h>
#include <Common/ScanWaitFreeMap.h> #include <Common/ScanWaitFreeMap.h>
#include <Catalog/MetastoreCommon.h>
#include <DataTypes/ObjectUtils.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/StorageSnapshot.h>
#include <Core/Types.h> #include <Core/Types.h>
// #include <Access/MaskingPolicyDataModel.h> // #include <Access/MaskingPolicyDataModel.h>
// #include <Access/MaskingPolicyCommon.h> // #include <Access/MaskingPolicyCommon.h>
@ -1208,6 +1219,22 @@ namespace Catalog
ProfileEvents::SetWorkerGroupForTableFailed); ProfileEvents::SetWorkerGroupForTableFailed);
} }
void Catalog::initStorageObjectSchema(StoragePtr & res)
{
// Load dynamic object column schema
if (res && hasDynamicSubcolumns(res->getInMemoryMetadata().getColumns()))
{
auto cnch_table = std::dynamic_pointer_cast<StorageCnchMergeTree>(res);
if (cnch_table)
{
auto assembled_schema = tryGetTableObjectAssembledSchema(res->getStorageUUID());
auto partial_schemas = tryGetTableObjectPartialSchemas(res->getStorageUUID());
cnch_table->resetObjectSchemas(assembled_schema, partial_schemas);
}
}
}
StoragePtr Catalog::getTable(const Context & query_context, const String & database, const String & name, const TxnTimestamp & ts) StoragePtr Catalog::getTable(const Context & query_context, const String & database, const String & name, const TxnTimestamp & ts)
{ {
StoragePtr res = nullptr; StoragePtr res = nullptr;
@ -1245,6 +1272,7 @@ namespace Catalog
res = createTableFromDataModel(query_context, *table); res = createTableFromDataModel(query_context, *table);
initStorageObjectSchema(res);
/// TODO: (zuochuang.zema, guanzhe.andy) handle TimeTravel /// TODO: (zuochuang.zema, guanzhe.andy) handle TimeTravel
if (auto * cnch_merge_tree = dynamic_cast<StorageCnchMergeTree *>(res.get())) if (auto * cnch_merge_tree = dynamic_cast<StorageCnchMergeTree *>(res.get()))
{ {
@ -1313,6 +1341,9 @@ namespace Catalog
if (!table) if (!table)
return; return;
res = createTableFromDataModel(query_context, *table); res = createTableFromDataModel(query_context, *table);
initStorageObjectSchema(res);
/// Try insert the storage into cache. /// Try insert the storage into cache.
if (res && cache_manager) if (res && cache_manager)
{ {
@ -6241,6 +6272,208 @@ namespace Catalog
d.set_definition(create_query); d.set_definition(create_query);
} }
void Catalog::appendObjectPartialSchema(
const StoragePtr & table, const TxnTimestamp & txn_id, const MutableMergeTreeDataPartsCNCHVector & parts)
{
//txn partial schema
//multi column
auto cnch_table = std::dynamic_pointer_cast<StorageCnchMergeTree>(table);
if (!cnch_table)
return;
auto subcolumns_limit = cnch_table->getSettings()->json_subcolumns_threshold;
//check schema compatibility and merge part schema
auto partial_schema = DB::getConcreteObjectColumns(
parts.begin(), parts.end(), table->getInMemoryMetadata().columns, [](const auto & part) { return part->getColumns(); });
// compare with existed schema , check if it need to insert
// Attention: this comparison will scan existed partial schema from meta store, it may cost too many meta store resource.
// if it cause meta store performance fallback, just remove this comparison
auto assembled_schema = tryGetTableObjectAssembledSchema(table->getStorageUUID());
auto existed_partial_schemas = tryGetTableObjectPartialSchemas(table->getStorageUUID());
std::vector<TxnTimestamp> existed_partial_schema_txnids(existed_partial_schemas.size());
std::for_each(
existed_partial_schemas.begin(),
existed_partial_schemas.end(),
[&existed_partial_schema_txnids](const auto & existed_partial_schema) {
existed_partial_schema_txnids.emplace_back(existed_partial_schema.first);
});
auto committed_partial_schema_txnids = filterUncommittedObjectPartialSchemas(existed_partial_schema_txnids);
std::vector<ObjectPartialSchema> committed_partial_schema_list(committed_partial_schema_txnids.size() + 2);
std::for_each(
committed_partial_schema_txnids.begin(),
committed_partial_schema_txnids.end(),
[&committed_partial_schema_list, &existed_partial_schemas](const auto & txn_id) {
committed_partial_schema_list.emplace_back(existed_partial_schemas[txn_id]);
});
committed_partial_schema_list.emplace_back(assembled_schema);
auto existed_assembled_schema = DB::getConcreteObjectColumns(
committed_partial_schema_list.begin(),
committed_partial_schema_list.end(),
cnch_table->getInMemoryMetadata().getColumns(),
[](const auto & partial_schema_) { return partial_schema_; });
committed_partial_schema_list.emplace_back(partial_schema);
auto new_assembled_schema = DB::getConcreteObjectColumns(
committed_partial_schema_list.begin(),
committed_partial_schema_list.end(),
cnch_table->getInMemoryMetadata().getColumns(),
[](const auto & partial_schema_) { return partial_schema_; });
if (new_assembled_schema != existed_assembled_schema)
{
DB::limitObjectSubcolumns(new_assembled_schema, subcolumns_limit);
meta_proxy->appendObjectPartialSchema(
name_space, UUIDHelpers::UUIDToString(table->getStorageUUID()), txn_id.toUInt64(), partial_schema.toString());
cnch_table->appendObjectPartialSchema(txn_id, partial_schema);
LOG_DEBUG(
log,
"Append dynamic object partial schema [TxnTimestamp:{}, Partial Schema:{}]",
txn_id.toString(),
partial_schema.toString());
}
}
ObjectAssembledSchema Catalog::tryGetTableObjectAssembledSchema(const UUID & table_uuid) const
{
auto serialized_assembled_schema = meta_proxy->getObjectAssembledSchema(name_space, UUIDHelpers::UUIDToString(table_uuid));
if (serialized_assembled_schema.empty())
return ColumnsDescription();
return ColumnsDescription::parse(serialized_assembled_schema);
}
ObjectPartialSchemas Catalog::tryGetTableObjectPartialSchemas(const UUID & table_uuid, const int & limit_size) const
{
auto serialized_partial_schemas
= meta_proxy->scanObjectPartialSchemas(name_space, UUIDHelpers::UUIDToString(table_uuid), limit_size);
ObjectPartialSchemas partial_schemas;
partial_schemas.reserve(serialized_partial_schemas.size());
std::for_each(
serialized_partial_schemas.begin(),
serialized_partial_schemas.end(),
[&partial_schemas](std::pair<String, String> serialized_partial_schema) {
partial_schemas.emplace(
std::stoll(serialized_partial_schema.first),
ColumnsDescription::parse(serialized_partial_schema.second));
});
return partial_schemas;
}
bool Catalog::resetObjectAssembledSchemaAndPurgePartialSchemas(
const UUID & table_uuid,
const ObjectAssembledSchema & old_assembled_schema,
const ObjectAssembledSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids)
{
return meta_proxy->resetObjectAssembledSchemaAndPurgePartialSchemas(
name_space,
UUIDHelpers::UUIDToString(table_uuid),
old_assembled_schema.empty() ? "" : old_assembled_schema.toString(),
new_assembled_schema.toString(),
partial_schema_txnids);
}
std::vector<TxnTimestamp> Catalog::filterUncommittedObjectPartialSchemas(std::vector<TxnTimestamp> & unfiltered_partial_schema_txnids)
{
std::vector<TxnTimestamp> committed_partial_schema_txnids;
std::unordered_map<TxnTimestamp, TxnTimestamp, TxnTimestampHasher> unfiltered_partial_schema_txnid_map;
unfiltered_partial_schema_txnid_map.reserve(unfiltered_partial_schema_txnids.size());
std::for_each(
unfiltered_partial_schema_txnids.begin(),
unfiltered_partial_schema_txnids.end(),
[&unfiltered_partial_schema_txnid_map](const auto & txn_id) { unfiltered_partial_schema_txnid_map[txn_id] = txn_id; });
// query partial schema status in meta store
auto partial_schema_statuses = batchGetObjectPartialSchemaStatuses(unfiltered_partial_schema_txnids);
std::for_each(
partial_schema_statuses.begin(),
partial_schema_statuses.end(),
[&committed_partial_schema_txnids, &unfiltered_partial_schema_txnid_map](const auto & partial_schema_status_pair) {
if (partial_schema_status_pair.second == ObjectPartialSchemaStatus::Finished)
{
committed_partial_schema_txnids.emplace_back(partial_schema_status_pair.first);
unfiltered_partial_schema_txnid_map.erase(partial_schema_status_pair.first);
}
});
// query remaining partial schemas by its co-responding txn record status
unfiltered_partial_schema_txnids.clear();
std::transform(
unfiltered_partial_schema_txnid_map.begin(),
unfiltered_partial_schema_txnid_map.end(),
std::back_inserter(unfiltered_partial_schema_txnids),
[](const auto & txn_id_pair) { return txn_id_pair.first; });
auto txn_record_statuses = getTransactionRecords(unfiltered_partial_schema_txnids, 10000);
std::for_each(
txn_record_statuses.begin(), txn_record_statuses.end(), [&committed_partial_schema_txnids](TransactionRecord txn_record) {
auto txn_id = txn_record.txnID();
auto status = txn_record.status();
if (status == CnchTransactionStatus::Finished)
committed_partial_schema_txnids.emplace_back(txn_id);
});
return committed_partial_schema_txnids;
}
ObjectPartialSchemaStatuses
Catalog::batchGetObjectPartialSchemaStatuses(const std::vector<TxnTimestamp> & txn_ids, const int & batch_size)
{
ObjectPartialSchemaStatuses partial_schema_statuses;
size_t total_txn_size = txn_ids.size();
partial_schema_statuses.reserve(total_txn_size);
auto fetch_records_in_batch = [&](size_t begin, size_t end) {
auto statuses_in_metastore = meta_proxy->batchGetObjectPartialSchemaStatuses(
name_space, std::vector<TxnTimestamp>(txn_ids.begin() + begin, txn_ids.begin() + end));
for (const auto & serialized_partial_schema_status : statuses_in_metastore)
{
auto txn_id = serialized_partial_schema_status.first;
auto status = ObjectSchemas::deserializeObjectPartialSchemaStatus(serialized_partial_schema_status.second);
partial_schema_statuses.emplace(txn_id, status);
}
};
if (batch_size > 0)
{
size_t batch_count{0};
while (batch_count + batch_size < total_txn_size)
{
fetch_records_in_batch(batch_count, batch_count + batch_size);
batch_count += batch_size;
}
fetch_records_in_batch(batch_count, total_txn_size);
}
else
fetch_records_in_batch(0, total_txn_size);
return partial_schema_statuses;
}
void Catalog::batchDeleteObjectPartialSchemaStatus(const std::vector<TxnTimestamp> &txn_ids)
{
meta_proxy->batchDeletePartialSchemaStatus(name_space, txn_ids);
}
void Catalog::commitObjectPartialSchema(const TxnTimestamp &txn_id)
{
meta_proxy->updateObjectPartialSchemaStatus(name_space, txn_id, ObjectPartialSchemaStatus::Finished);
}
void Catalog::abortObjectPartialSchema(const TxnTimestamp & txn_id)
{
meta_proxy->updateObjectPartialSchemaStatus(name_space, txn_id, ObjectPartialSchemaStatus::Aborted);
}
std::unordered_map<String, std::shared_ptr<PartitionMetrics>> std::unordered_map<String, std::shared_ptr<PartitionMetrics>>
Catalog::loadPartitionMetricsSnapshotFromMetastore(const String & table_uuid) Catalog::loadPartitionMetricsSnapshotFromMetastore(const String & table_uuid)
{ {

View File

@ -16,6 +16,7 @@
#pragma once #pragma once
#include <map> #include <map>
#include <optional>
#include <set> #include <set>
#include <Catalog/CatalogUtils.h> #include <Catalog/CatalogUtils.h>
#include <Catalog/DataModelPartWrapper.h> #include <Catalog/DataModelPartWrapper.h>
@ -34,12 +35,15 @@
#include <Transaction/TransactionCommon.h> #include <Transaction/TransactionCommon.h>
#include <Transaction/TxnTimestamp.h> #include <Transaction/TxnTimestamp.h>
#include <cppkafka/cppkafka.h> #include <cppkafka/cppkafka.h>
#include "common/types.h"
#include <Common/Config/MetastoreConfig.h>
#include <Common/Configurations.h> #include <Common/Configurations.h>
#include <Common/DNSResolver.h> #include <Common/DNSResolver.h>
#include <Common/HostWithPorts.h> #include <Common/HostWithPorts.h>
#include <Common/Config/MetastoreConfig.h>
#include <common/getFQDNOrHostName.h> #include <common/getFQDNOrHostName.h>
#include "Catalog/IMetastore.h" #include "Storages/IStorage_fwd.h"
#include <Storages/StorageSnapshot.h>
#include <Catalog/IMetastore.h>
// #include <Access/MaskingPolicyDataModel.h> // #include <Access/MaskingPolicyDataModel.h>
namespace DB::ErrorCodes namespace DB::ErrorCodes
@ -755,6 +759,25 @@ public:
DeleteBitmapMetaPtrVector listDetachedDeleteBitmaps(const MergeTreeMetaBase & storage, const AttachFilter & filter); DeleteBitmapMetaPtrVector listDetachedDeleteBitmaps(const MergeTreeMetaBase & storage, const AttachFilter & filter);
// Append partial object column schema in Txn
void
appendObjectPartialSchema(const StoragePtr & table, const TxnTimestamp & txn_id, const MutableMergeTreeDataPartsCNCHVector & parts);
ObjectAssembledSchema tryGetTableObjectAssembledSchema(const UUID & table_uuid) const;
std::vector<TxnTimestamp> filterUncommittedObjectPartialSchemas(std::vector<TxnTimestamp> & unfiltered_partial_schema_txnids);
// @param limit_size -1 means no limit , read all partial schemas as possible
ObjectPartialSchemas tryGetTableObjectPartialSchemas(const UUID & table_uuid, const int & limit_size = -1) const;
bool resetObjectAssembledSchemaAndPurgePartialSchemas(
const UUID & table_uuid,
const ObjectAssembledSchema & old_assembled_schema,
const ObjectAssembledSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids);
ObjectPartialSchemaStatuses batchGetObjectPartialSchemaStatuses(const std::vector<TxnTimestamp> & txn_ids, const int & batch_size = 10000);
void batchDeleteObjectPartialSchemaStatus(const std::vector<TxnTimestamp> & txn_ids);
void commitObjectPartialSchema(const TxnTimestamp & txn_id);
void abortObjectPartialSchema(const TxnTimestamp & txn_id);
void initStorageObjectSchema(StoragePtr & res);
// Access Entities // Access Entities
std::optional<AccessEntityModel> tryGetAccessEntity(EntityType type, const String & name); std::optional<AccessEntityModel> tryGetAccessEntity(EntityType type, const String & name);
std::vector<AccessEntityModel> getAllAccessEntities(EntityType type); std::vector<AccessEntityModel> getAllAccessEntities(EntityType type);
@ -762,6 +785,7 @@ public:
void dropAccessEntity(EntityType type, const UUID & uuid, const String & name); void dropAccessEntity(EntityType type, const UUID & uuid, const String & name);
void putAccessEntity(EntityType type, AccessEntityModel & new_access_entity, AccessEntityModel & old_access_entity, bool replace_if_exists = true); void putAccessEntity(EntityType type, AccessEntityModel & new_access_entity, AccessEntityModel & old_access_entity, bool replace_if_exists = true);
private: private:
Poco::Logger * log = &Poco::Logger::get("Catalog"); Poco::Logger * log = &Poco::Logger::get("Catalog");
Context & context; Context & context;

View File

@ -26,10 +26,12 @@
#include <DaemonManager/BGJobStatusInCatalog.h> #include <DaemonManager/BGJobStatusInCatalog.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <Protos/DataModelHelpers.h> #include <Protos/DataModelHelpers.h>
#include "common/types.h" #include <common/types.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include "Catalog/MetastoreByteKVImpl.h" #include <Catalog/MetastoreByteKVImpl.h>
#include "Interpreters/executeQuery.h" #include <Interpreters/executeQuery.h>
#include <Storages/MergeTree/IMetastore.h>
#include <Storages/StorageSnapshot.h>
namespace DB::ErrorCodes namespace DB::ErrorCodes
{ {
@ -1585,6 +1587,11 @@ void MetastoreProxy::setBGJobStatus(const String & name_space, const String & uu
dedupWorkerBGJobStatusKey(name_space, uuid), dedupWorkerBGJobStatusKey(name_space, uuid),
String{BGJobStatusInCatalog::serializeToChar(status)} String{BGJobStatusInCatalog::serializeToChar(status)}
); );
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
metastore_ptr->put(
objectSchemaAssembleBGJobStatusKey(name_space, uuid),
String{BGJobStatusInCatalog::serializeToChar(status)}
);
else else
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR); throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
} }
@ -1604,6 +1611,8 @@ std::optional<CnchBGThreadStatus> MetastoreProxy::getBGJobStatus(const String &
metastore_ptr->get(mmysqlBGJobStatusKey(name_space, uuid), status_store_data); metastore_ptr->get(mmysqlBGJobStatusKey(name_space, uuid), status_store_data);
else if (type == CnchBGThreadType::DedupWorker) else if (type == CnchBGThreadType::DedupWorker)
metastore_ptr->get(dedupWorkerBGJobStatusKey(name_space, uuid), status_store_data); metastore_ptr->get(dedupWorkerBGJobStatusKey(name_space, uuid), status_store_data);
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
metastore_ptr->get(objectSchemaAssembleBGJobStatusKey(name_space, uuid), status_store_data);
else else
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR); throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
@ -1638,6 +1647,8 @@ std::unordered_map<UUID, CnchBGThreadStatus> MetastoreProxy::getBGJobStatuses(co
return metastore_ptr->getByPrefix(allMmysqlBGJobStatusKeyPrefix(name_space)); return metastore_ptr->getByPrefix(allMmysqlBGJobStatusKeyPrefix(name_space));
else if (type == CnchBGThreadType::DedupWorker) else if (type == CnchBGThreadType::DedupWorker)
return metastore_ptr->getByPrefix(allDedupWorkerBGJobStatusKeyPrefix(name_space)); return metastore_ptr->getByPrefix(allDedupWorkerBGJobStatusKeyPrefix(name_space));
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
return metastore_ptr->getByPrefix(allObjectSchemaAssembleBGJobStatusKeyPrefix(name_space));
else else
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR); throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
}; };
@ -1679,6 +1690,9 @@ void MetastoreProxy::dropBGJobStatus(const String & name_space, const String & u
case CnchBGThreadType::DedupWorker: case CnchBGThreadType::DedupWorker:
metastore_ptr->drop(dedupWorkerBGJobStatusKey(name_space, uuid)); metastore_ptr->drop(dedupWorkerBGJobStatusKey(name_space, uuid));
break; break;
case CnchBGThreadType::ObjectSchemaAssemble:
metastore_ptr->drop(objectSchemaAssembleBGJobStatusKey(name_space, uuid));
break;
default: default:
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR); throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
} }
@ -2710,6 +2724,139 @@ IMetaStore::IteratorPtr MetastoreProxy::getItemsInTrash(const String & name_spac
return metastore_ptr->getByPrefix(trashItemsPrefix(name_space, table_uuid), limit); return metastore_ptr->getByPrefix(trashItemsPrefix(name_space, table_uuid), limit);
} }
String MetastoreProxy::extractTxnIDFromPartialSchemaKey(const String &partial_schema_key)
{
auto pos = partial_schema_key.find_last_of('_');
return partial_schema_key.substr(pos + 1, String::npos);
}
void MetastoreProxy::appendObjectPartialSchema(
const String & name_space, const String & table_uuid, const UInt64 & txn_id, const SerializedObjectSchema & partial_schema)
{
BatchCommitRequest batch_write;
batch_write.AddPut(SinglePutRequest(partialSchemaKey(name_space, table_uuid, txn_id), partial_schema));
batch_write.AddPut(SinglePutRequest(
partialSchemaStatusKey(name_space, txn_id),
ObjectSchemas::serializeObjectPartialSchemaStatus(ObjectPartialSchemaStatus::Running)));
BatchCommitResponse store_response;
metastore_ptr->batchWrite(batch_write, store_response);
}
SerializedObjectSchema MetastoreProxy::getObjectPartialSchema(const String &name_space, const String &table_uuid, const UInt64 &txn_id)
{
SerializedObjectSchema partial_schema;
metastore_ptr->get(partialSchemaKey(name_space, table_uuid, txn_id), partial_schema);
if (partial_schema.empty())
return "";
return partial_schema;
}
SerializedObjectSchemas MetastoreProxy::scanObjectPartialSchemas(const String &name_space, const String &table_uuid, const UInt64 &limit_size)
{
auto scan_prefix = partialSchemaPrefix(name_space, table_uuid);
UInt64 scan_limit = limit_size <= 0 ? 0 : limit_size;
auto scan_iterator = metastore_ptr->getByPrefix(scan_prefix, scan_limit);
SerializedObjectSchemas serialized_object_schemas;
while (scan_iterator->next())
{
auto key = scan_iterator->key();
serialized_object_schemas.emplace(extractTxnIDFromPartialSchemaKey(key), scan_iterator->value());
}
return serialized_object_schemas;
}
SerializedObjectSchema MetastoreProxy::getObjectAssembledSchema(const String &name_space, const String &table_uuid)
{
SerializedObjectSchema assembled_schema;
metastore_ptr->get(assembledSchemaKey(name_space, table_uuid), assembled_schema);
if (assembled_schema.empty())
return "";
return assembled_schema;
}
bool MetastoreProxy::resetObjectAssembledSchemaAndPurgePartialSchemas(
const String & name_space,
const String & table_uuid,
const SerializedObjectSchema & old_assembled_schema,
const SerializedObjectSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids)
{
Poco::Logger * log = &Poco::Logger::get(__func__);
BatchCommitRequest batch_write;
bool if_not_exists = false;
if (old_assembled_schema.empty())
if_not_exists = true;
auto update_request = SinglePutRequest(assembledSchemaKey(name_space, table_uuid), new_assembled_schema, old_assembled_schema);
update_request.if_not_exists = if_not_exists;
batch_write.AddPut(update_request);
for (const auto & txn_id : partial_schema_txnids)
batch_write.AddDelete(partialSchemaKey(name_space, table_uuid, txn_id.toUInt64()));
BatchCommitResponse store_response;
try
{
metastore_ptr->batchWrite(batch_write, store_response);
return true;
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::METASTORE_COMMIT_CAS_FAILURE)
{
LOG_WARNING(
log,
fmt::format(
"Object schema refresh CAS put fail with old schema:{} and new schema:{}", old_assembled_schema, new_assembled_schema));
return false;
}
else
throw e;
}
}
SerializedObjectSchemaStatuses MetastoreProxy::batchGetObjectPartialSchemaStatuses(const String &name_space, const std::vector<TxnTimestamp> &txn_ids)
{
Strings keys;
for (const auto & txn_id : txn_ids)
keys.emplace_back(partialSchemaStatusKey(name_space, txn_id.toUInt64()));
auto serialized_statuses_in_metastore = metastore_ptr->multiGet(keys);
SerializedObjectSchemaStatuses serialized_statuses;
serialized_statuses.reserve(serialized_statuses_in_metastore.size());
for (size_t i = 0; i < serialized_statuses_in_metastore.size(); i++)
{
auto txn_id = txn_ids[i];
auto status = serialized_statuses_in_metastore[i].first;
if (status.empty())
serialized_statuses.emplace(txn_id, ObjectSchemas::serializeObjectPartialSchemaStatus(ObjectPartialSchemaStatus::Finished));
else
serialized_statuses.emplace(txn_id, status);
}
return serialized_statuses;
}
void MetastoreProxy::batchDeletePartialSchemaStatus(const String &name_space, const std::vector<TxnTimestamp> &txn_ids)
{
BatchCommitRequest batch_delete;
for (const auto & txn_id : txn_ids)
batch_delete.AddDelete(partialSchemaStatusKey(name_space, txn_id.toUInt64()));
BatchCommitResponse delete_result;
metastore_ptr->batchWrite(batch_delete, delete_result);
}
void MetastoreProxy::updateObjectPartialSchemaStatus(const String &name_space, const TxnTimestamp &txn_id, const ObjectPartialSchemaStatus & status)
{
metastore_ptr->put(partialSchemaStatusKey(name_space, txn_id), ObjectSchemas::serializeObjectPartialSchemaStatus(status));
}
IMetaStore::IteratorPtr MetastoreProxy::getAllDeleteBitmaps(const String & name_space, const String & table_uuid) IMetaStore::IteratorPtr MetastoreProxy::getAllDeleteBitmaps(const String & name_space, const String & table_uuid)
{ {
return metastore_ptr->getByPrefix(deleteBitmapPrefix(name_space, table_uuid)); return metastore_ptr->getByPrefix(deleteBitmapPrefix(name_space, table_uuid));

View File

@ -22,6 +22,7 @@
#include <Storages/MergeTree/DeleteBitmapMeta.h> #include <Storages/MergeTree/DeleteBitmapMeta.h>
// #include <Transaction/ICnchTransaction.h> // #include <Transaction/ICnchTransaction.h>
#include <sstream> #include <sstream>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <Catalog/IMetastore.h> #include <Catalog/IMetastore.h>
#include <CloudServices/CnchBGThreadCommon.h> #include <CloudServices/CnchBGThreadCommon.h>
@ -36,6 +37,7 @@
#include <cppkafka/cppkafka.h> #include <cppkafka/cppkafka.h>
#include <google/protobuf/repeated_field.h> #include <google/protobuf/repeated_field.h>
#include <common/types.h> #include <common/types.h>
#include <Storages/StorageSnapshot.h>
#include <Access/IAccessEntity.h> #include <Access/IAccessEntity.h>
#include <Parsers/formatTenantDatabaseName.h> #include <Parsers/formatTenantDatabaseName.h>
#include <Interpreters/SQLBinding/SQLBinding.h> #include <Interpreters/SQLBinding/SQLBinding.h>
@ -95,6 +97,7 @@ namespace DB::Catalog
#define PARTGC_BG_JOB_STATUS "PARTGC_BGJS_" #define PARTGC_BG_JOB_STATUS "PARTGC_BGJS_"
#define CONSUMER_BG_JOB_STATUS "CONSUMER_BGJS_" #define CONSUMER_BG_JOB_STATUS "CONSUMER_BGJS_"
#define DEDUPWORKER_BG_JOB_STATUS "DEDUPWORKER_BGJS_" #define DEDUPWORKER_BG_JOB_STATUS "DEDUPWORKER_BGJS_"
#define OBJECT_SCHEMA_ASSEMBLE_BG_JOB_STATUS "OBJECT_SCHEMA_ASSEMBLE_BGJS_"
#define PREALLOCATE_VW "PVW_" #define PREALLOCATE_VW "PVW_"
#define DICTIONARY_STORE_PREFIX "DIC_" #define DICTIONARY_STORE_PREFIX "DIC_"
#define RESOURCE_GROUP_PREFIX "RG_" #define RESOURCE_GROUP_PREFIX "RG_"
@ -113,6 +116,9 @@ namespace DB::Catalog
#define MATERIALIZEDMYSQL_PREFIX "MMYSQL_" #define MATERIALIZEDMYSQL_PREFIX "MMYSQL_"
#define MATERIALIZEDMYSQL_BG_JOB_STATUS "MATERIALIZEDMYSQL_BGJS_" #define MATERIALIZEDMYSQL_BG_JOB_STATUS "MATERIALIZEDMYSQL_BGJS_"
#define DETACHED_DELETE_BITMAP_PREFIX "DDLB_" #define DETACHED_DELETE_BITMAP_PREFIX "DDLB_"
#define OBJECT_PARTIAL_SCHEMA_PREFIX "PS_"
#define OBJECT_ASSEMBLED_SCHEMA_PREFIX "AS_"
#define OBJECT_PARTIAL_SCHEMA_STATUS_PREFIX "PSS_"
#define PARTITION_PARTS_METRICS_SNAPSHOT_PREFIX "PPS_" #define PARTITION_PARTS_METRICS_SNAPSHOT_PREFIX "PPS_"
#define TABLE_TRASHITEMS_METRICS_SNAPSHOT_PREFIX "TTS_" #define TABLE_TRASHITEMS_METRICS_SNAPSHOT_PREFIX "TTS_"
#define DICTIONARY_BUCKET_UPDATE_TIME_PREFIX "DBUT_" #define DICTIONARY_BUCKET_UPDATE_TIME_PREFIX "DBUT_"
@ -143,6 +149,10 @@ static EntityMetastorePrefix getEntityMetastorePrefix(EntityType type)
} }
} }
using SerializedObjectSchemas = std::unordered_map<String, String>;
using SerializedObjectSchemaStatuses = std::unordered_map<TxnTimestamp, String, TxnTimestampHasher>;
using SerializedObjectSchema = String;
static std::shared_ptr<MetastoreFDBImpl> getFDBInstance(const String & cluster_config_path) static std::shared_ptr<MetastoreFDBImpl> getFDBInstance(const String & cluster_config_path)
{ {
/// Notice: A single process can only have fdb instance /// Notice: A single process can only have fdb instance
@ -592,6 +602,16 @@ public:
return allDedupWorkerBGJobStatusKeyPrefix(name_space) + uuid; return allDedupWorkerBGJobStatusKeyPrefix(name_space) + uuid;
} }
static std::string allObjectSchemaAssembleBGJobStatusKeyPrefix(const std::string & name_space)
{
return escapeString(name_space) + '_' + OBJECT_SCHEMA_ASSEMBLE_BG_JOB_STATUS;
}
static std::string objectSchemaAssembleBGJobStatusKey(const std::string & name_space, const std::string & uuid)
{
return allObjectSchemaAssembleBGJobStatusKeyPrefix(name_space) + uuid;
}
static UUID parseUUIDFromBGJobStatusKey(const std::string & key); static UUID parseUUIDFromBGJobStatusKey(const std::string & key);
static std::string preallocateVW(const std::string & name_space, const std::string & uuid) static std::string preallocateVW(const std::string & name_space, const std::string & uuid)
@ -765,6 +785,26 @@ public:
return escapeString(name_space) + "_" + DATA_ITEM_TRASH_PREFIX + uuid + "_"; return escapeString(name_space) + "_" + DATA_ITEM_TRASH_PREFIX + uuid + "_";
} }
static String partialSchemaPrefix(const String & name_space, const String & table_uuid)
{
return escapeString(name_space) + "_" + OBJECT_PARTIAL_SCHEMA_PREFIX + table_uuid + "_";
}
static String partialSchemaKey(const String & name_space, const String & table_uuid, const UInt64 & txn_id)
{
return escapeString(name_space) + "_" + OBJECT_PARTIAL_SCHEMA_PREFIX + table_uuid + "_" + toString(txn_id);
}
static String assembledSchemaKey(const String & name_space, const String & table_uuid)
{
return escapeString(name_space) + "_" + OBJECT_ASSEMBLED_SCHEMA_PREFIX + table_uuid;
}
static String partialSchemaStatusKey(const String & name_space, const UInt64 & txn_id)
{
return escapeString(name_space) + "-" + OBJECT_PARTIAL_SCHEMA_STATUS_PREFIX + "_" + toString(txn_id);
}
static String partitionPartsMetricsSnapshotPrefix(const String & name_space, const String & table_uuid, const String & partition_id) static String partitionPartsMetricsSnapshotPrefix(const String & name_space, const String & table_uuid, const String & partition_id)
{ {
return escapeString(name_space) + "_" + PARTITION_PARTS_METRICS_SNAPSHOT_PREFIX + table_uuid + "_" + partition_id; return escapeString(name_space) + "_" + PARTITION_PARTS_METRICS_SNAPSHOT_PREFIX + table_uuid + "_" + partition_id;
@ -1105,6 +1145,25 @@ public:
* @param limit Limit the results, disabled by passing 0. * @param limit Limit the results, disabled by passing 0.
*/ */
IMetaStore::IteratorPtr getItemsInTrash(const String & name_space, const String & table_uuid, const size_t & limit); IMetaStore::IteratorPtr getItemsInTrash(const String & name_space, const String & table_uuid, const size_t & limit);
//Object column schema related API
static String extractTxnIDFromPartialSchemaKey(const String & partial_schema_key);
void appendObjectPartialSchema(
const String & name_space, const String & table_uuid, const UInt64 & txn_id, const SerializedObjectSchema & partial_schema);
SerializedObjectSchema getObjectPartialSchema(const String & name_space, const String & table_uuid, const UInt64 & txn_id);
SerializedObjectSchemas scanObjectPartialSchemas(const String & name_space, const String & table_uuid, const UInt64 & limit_size);
SerializedObjectSchema getObjectAssembledSchema(const String & name_space, const String & table_uuid);
bool resetObjectAssembledSchemaAndPurgePartialSchemas(
const String & name_space,
const String & table_uuid,
const SerializedObjectSchema & old_assembled_schema,
const SerializedObjectSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids);
SerializedObjectSchemaStatuses batchGetObjectPartialSchemaStatuses(const String & name_space, const std::vector<TxnTimestamp> & txn_ids);
void batchDeletePartialSchemaStatus(const String & name_space, const std::vector<TxnTimestamp> & txn_ids);
void updateObjectPartialSchemaStatus(const String &name_space, const TxnTimestamp & txn_id, const ObjectPartialSchemaStatus & status);
IMetaStore::IteratorPtr getAllDeleteBitmaps(const String & name_space, const String & table_uuid); IMetaStore::IteratorPtr getAllDeleteBitmaps(const String & name_space, const String & table_uuid);
/** /**

View File

@ -35,9 +35,10 @@ namespace CnchBGThread
DedupWorker = 5, DedupWorker = 5,
Clustering = 6, Clustering = 6,
MaterializedMySQL = 7, MaterializedMySQL = 7,
ObjectSchemaAssemble = 8,
ServerMinType = PartGC, ServerMinType = PartGC,
ServerMaxType = MaterializedMySQL, ServerMaxType = ObjectSchemaAssemble,
GlobalGC = 20, /// reserve several entries GlobalGC = 20, /// reserve several entries
TxnGC = 21, TxnGC = 21,
@ -96,6 +97,8 @@ constexpr auto toString(CnchBGThreadType type)
return "TxnGCThread"; return "TxnGCThread";
case CnchBGThreadType::ResourceReport: case CnchBGThreadType::ResourceReport:
return "ResourceReport"; return "ResourceReport";
case CnchBGThreadType::ObjectSchemaAssemble:
return "ObjectSchemaAssembleThread";
case CnchBGThreadType::MemoryBuffer: case CnchBGThreadType::MemoryBuffer:
return "MemoryBuffer"; return "MemoryBuffer";
case CnchBGThreadType::MaterializedMySQL: case CnchBGThreadType::MaterializedMySQL:

View File

@ -23,6 +23,8 @@
#include <CloudServices/CnchPartGCThread.h> #include <CloudServices/CnchPartGCThread.h>
#include <CloudServices/DedupWorkerManager.h> #include <CloudServices/DedupWorkerManager.h>
#include <CloudServices/ReclusteringManagerThread.h> #include <CloudServices/ReclusteringManagerThread.h>
#include <CloudServices/CnchObjectColumnSchemaAssembleThread.h>
#include <Databases/MySQL/MaterializedMySQLSyncThreadManager.h> #include <Databases/MySQL/MaterializedMySQLSyncThreadManager.h>
#include <regex> #include <regex>
@ -71,6 +73,10 @@ CnchBGThreadPtr CnchBGThreadsMap::createThread(const StorageID & storage_id)
{ {
return std::make_shared<ReclusteringManagerThread>(getContext(), storage_id); return std::make_shared<ReclusteringManagerThread>(getContext(), storage_id);
} }
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
{
return std::make_shared<CnchObjectColumnSchemaAssembleThread>(getContext(), storage_id);
}
else if (type == CnchBGThreadType::MaterializedMySQL) else if (type == CnchBGThreadType::MaterializedMySQL)
{ {
return std::make_shared<MaterializedMySQLSyncThreadManager>(getContext(), storage_id); return std::make_shared<MaterializedMySQLSyncThreadManager>(getContext(), storage_id);

View File

@ -0,0 +1,138 @@
#include <algorithm>
#include <string>
#include <CloudServices/CnchObjectColumnSchemaAssembleThread.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/Context.h>
#include <Storages/CnchStorageCache.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/PartCacheManager.h>
#include <Storages/StorageCnchMergeTree.h>
#include <Storages/StorageSnapshot.h>
#include <Transaction/TxnTimestamp.h>
#include <fmt/format.h>
namespace DB
{
CnchObjectColumnSchemaAssembleThread::CnchObjectColumnSchemaAssembleThread(ContextPtr context_, const StorageID & id_)
: ICnchBGThread(context_, CnchBGThreadType::ObjectSchemaAssemble, id_)
{
}
void CnchObjectColumnSchemaAssembleThread::runImpl()
{
try
{
auto storage = getStorageFromCatalog();
auto table_uuid = storage->getStorageUUID();
auto & table = checkAndGetCnchTable(storage);
auto storage_settings = table.getSettings();
auto database_name = storage->getDatabaseName();
auto table_name = storage->getTableName();
if (storage->is_dropped)
{
LOG_DEBUG(log, "Table was dropped, wait for removing...");
scheduled_task->scheduleAfter(10 * 1000);
return;
}
if (storage->supportsDynamicSubcolumns() && hasDynamicSubcolumns(storage->getInMemoryMetadata().getColumns()))
{
LOG_INFO(log, "{}.{} Start assemble partial schemas", database_name, table_name);
auto catalog = getContext()->getCnchCatalog();
auto [current_topology_version, current_topology] = getContext()->getCnchTopologyMaster()->getCurrentTopologyVersion();
// Step 1:scan object partial schema and drop uncommitted partial schemas
auto old_assembled_schema = catalog->tryGetTableObjectAssembledSchema(table_uuid);
auto partial_schemas
= catalog->tryGetTableObjectPartialSchemas(table_uuid, storage_settings->json_partial_schema_assemble_batch_size);
LOG_DEBUG(log, "{}.{} Before assemble. Assembled schema :{}", database_name, table_name, old_assembled_schema.toString());
if (partial_schemas.empty())
{
LOG_INFO(log, "{}.{} no need to refresh dynamic object column schema.", database_name, table_name);
scheduled_task->scheduleAfter(50 * 1000);
return;
}
std::vector<TxnTimestamp> unfiltered_partial_schema_txnids;
unfiltered_partial_schema_txnids.reserve(partial_schemas.size());
for (const auto & [txn_id, partial_schema] : partial_schemas)
{
LOG_DEBUG(
log,
"{}.{} Before assemble. Partial schema :[{}->{}]",
database_name,
table_name,
txn_id.toString(),
partial_schema.toString());
unfiltered_partial_schema_txnids.emplace_back(txn_id);
}
auto committed_partial_schema_txnids = catalog->filterUncommittedObjectPartialSchemas(unfiltered_partial_schema_txnids);
// Step 2:assemble partial schema to assembled schema
std::vector<String> partial_schema_txn_ids_for_print;
std::vector<ColumnsDescription> schemas_ready_to_assemble;
partial_schema_txn_ids_for_print.reserve(committed_partial_schema_txnids.size());
schemas_ready_to_assemble.reserve(committed_partial_schema_txnids.size() + 1);
for (auto & txn_id : committed_partial_schema_txnids)
{
auto partial_schema = partial_schemas[txn_id];
schemas_ready_to_assemble.emplace_back(partial_schema);
partial_schema_txn_ids_for_print.emplace_back(txn_id.toString());
}
schemas_ready_to_assemble.emplace_back(old_assembled_schema);
auto new_assembled_schema = DB::getConcreteObjectColumns(
schemas_ready_to_assemble.begin(),
schemas_ready_to_assemble.end(),
storage->getInMemoryMetadata().getColumns(),
[](const auto & schema) { return schema; });
// Step 3:update assembled schema and delete partial schema in meta store
// TODO:@lianwenlong consider purge fail and check lease
auto cas_put_result = catalog->resetObjectAssembledSchemaAndPurgePartialSchemas(
table_uuid, old_assembled_schema, new_assembled_schema, committed_partial_schema_txnids);
LOG_DEBUG(
log,
"{}.{} After assemble.Assembled schema :{} and deleted txn ids:{}, result:{}",
database_name,
table_name,
new_assembled_schema.toString(),
fmt::join(partial_schema_txn_ids_for_print, ","),
std::to_string(cas_put_result));
if (cas_put_result)
{
// Step 4:update assembled schema and delete partial schema in storage cache
if (auto cache_manager = getContext()->getPartCacheManager())
{
if (auto storage_in_cache = cache_manager->getStorageFromCache(table_uuid, current_topology_version))
{
auto & table_in_cache = checkAndGetCnchTable(storage_in_cache);
table_in_cache.refreshAssembledSchema(new_assembled_schema, committed_partial_schema_txnids);
}
}
// Step5: clean partial schema status in meta store including
catalog->batchDeleteObjectPartialSchemaStatus(committed_partial_schema_txnids);
// Step6: @TODO:lianwenlong rollback aborted partial schema from meta store and storage cache
}
LOG_INFO(
log, "{}.{} Finish assemble partial schemas with result:{}", database_name, table_name, std::to_string(cas_put_result));
}
}
catch (...)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
}
scheduled_task->scheduleAfter(50 * 1000);
}
} //namespace DB end

View File

@ -0,0 +1,16 @@
#pragma once
#include <CloudServices/ICnchBGThread.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
class CnchObjectColumnSchemaAssembleThread : public ICnchBGThread
{
public:
CnchObjectColumnSchemaAssembleThread(ContextPtr context_, const StorageID & id_);
private:
void runImpl() override;
};
}//namespace DB end

View File

@ -27,9 +27,10 @@
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <Common/CurrentThread.h> #include <Common/CurrentThread.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include "Interpreters/Context_fwd.h" #include <DataTypes/ObjectUtils.h>
#include "Storages/Hive/HiveFile/IHiveFile.h" #include <Interpreters/Context_fwd.h>
#include "Storages/Hive/StorageCnchHive.h" #include <Storages/Hive/HiveFile/IHiveFile.h>
#include <Storages/Hive/StorageCnchHive.h>
#include <Storages/RemoteFile/StorageCnchHDFS.h> #include <Storages/RemoteFile/StorageCnchHDFS.h>
#include <Storages/RemoteFile/StorageCnchS3.h> #include <Storages/RemoteFile/StorageCnchS3.h>
@ -63,6 +64,7 @@ AssignedResource::AssignedResource(AssignedResource && resource)
part_names = resource.part_names; // don't call move here part_names = resource.part_names; // don't call move here
resource.sent_create_query = true; resource.sent_create_query = true;
object_columns = resource.object_columns;
} }
void AssignedResource::addDataParts(const ServerDataPartsVector & parts) void AssignedResource::addDataParts(const ServerDataPartsVector & parts)
@ -470,6 +472,7 @@ void CnchServerResource::allocateResource(
worker_resource.create_table_query = resource.create_table_query; worker_resource.create_table_query = resource.create_table_query;
worker_resource.worker_table_name = resource.worker_table_name; worker_resource.worker_table_name = resource.worker_table_name;
worker_resource.bucket_numbers = assigned_bucket_numbers; worker_resource.bucket_numbers = assigned_bucket_numbers;
worker_resource.object_columns = resource.object_columns;
} }
} }
} }

View File

@ -17,6 +17,8 @@
#include <optional> #include <optional>
#include <Catalog/DataModelPartWrapper_fwd.h> #include <Catalog/DataModelPartWrapper_fwd.h>
#include <CloudServices/CnchWorkerClient.h> #include <CloudServices/CnchWorkerClient.h>
#include <Common/HostWithPorts.h>
#include <Storages/ColumnsDescription.h>
#include <Core/Types.h> #include <Core/Types.h>
#include <Interpreters/StorageID.h> #include <Interpreters/StorageID.h>
#include <Interpreters/WorkerGroupHandle.h> #include <Interpreters/WorkerGroupHandle.h>
@ -86,6 +88,8 @@ struct AssignedResource
std::unordered_set<String> part_names; std::unordered_set<String> part_names;
ColumnsDescription object_columns;
void addDataParts(const ServerDataPartsVector & parts); void addDataParts(const ServerDataPartsVector & parts);
void addDataParts(const FileDataPartsCNCHVector & parts); void addDataParts(const FileDataPartsCNCHVector & parts);
void addDataParts(const HiveFiles & parts); void addDataParts(const HiveFiles & parts);
@ -141,6 +145,13 @@ public:
assigned_resource.bucket_numbers = required_bucket_numbers; assigned_resource.bucket_numbers = required_bucket_numbers;
} }
void setResourceReplicated(const UUID & storage_id, bool replicated)
{
std::lock_guard lock(mutex);
auto & assigned_resource = assigned_table_resource.at(storage_id);
assigned_resource.replicated = replicated;
}
/// Send resource to worker /// Send resource to worker
void sendResource(const ContextPtr & context, const HostWithPorts & worker); void sendResource(const ContextPtr & context, const HostWithPorts & worker);
/// allocate and send resource to worker_group /// allocate and send resource to worker_group
@ -152,6 +163,14 @@ public:
void sendResources(const ContextPtr & context, WorkerAction act); void sendResources(const ContextPtr & context, WorkerAction act);
void cleanResource(); void cleanResource();
void addDynamicObjectSchema(const UUID & storage_id, const ColumnsDescription & object_columns_)
{
std::lock_guard lock(mutex);
auto & assigned_resource = assigned_table_resource.at(storage_id);
assigned_resource.object_columns = object_columns_;
}
void setSendMutations(bool send_mutations_) { send_mutations = send_mutations_; } void setSendMutations(bool send_mutations_) { send_mutations = send_mutations_; }
private: private:

View File

@ -26,12 +26,13 @@
#include <Transaction/TransactionCommon.h> #include <Transaction/TransactionCommon.h>
#include <Transaction/TransactionCoordinatorRcCnch.h> #include <Transaction/TransactionCoordinatorRcCnch.h>
#include <Transaction/TxnTimestamp.h> #include <Transaction/TxnTimestamp.h>
#include "Common/tests/gtest_global_context.h" #include <Common/RWLock.h>
#include <Common/tests/gtest_global_context.h>
#include <Statistics/AutoStatisticsHelper.h> #include <Statistics/AutoStatisticsHelper.h>
#include <Statistics/AutoStatisticsRpcUtils.h> #include <Statistics/AutoStatisticsRpcUtils.h>
#include <Statistics/AutoStatisticsManager.h> #include <Statistics/AutoStatisticsManager.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Access/AccessControlManager.h> #include <DataTypes/ObjectUtils.h>
#include <Interpreters/DatabaseCatalog.h> #include <Interpreters/DatabaseCatalog.h>
#include <CloudServices/CnchMergeMutateThread.h> #include <CloudServices/CnchMergeMutateThread.h>
#include <CloudServices/CnchDataWriter.h> #include <CloudServices/CnchDataWriter.h>

View File

@ -22,8 +22,9 @@
#include <Protos/RPCHelpers.h> #include <Protos/RPCHelpers.h>
#include <Protos/cnch_worker_rpc.pb.h> #include <Protos/cnch_worker_rpc.pb.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
#include "Storages/Hive/HiveFile/IHiveFile.h" #include <DataTypes/ObjectUtils.h>
#include "Storages/Hive/StorageCnchHive.h" #include <Storages/Hive/HiveFile/IHiveFile.h>
#include <Storages/Hive/StorageCnchHive.h>
#include <Storages/StorageCnchMergeTree.h> #include <Storages/StorageCnchMergeTree.h>
#include <Transaction/ICnchTransaction.h> #include <Transaction/ICnchTransaction.h>
#include <WorkerTasks/ManipulationList.h> #include <WorkerTasks/ManipulationList.h>
@ -76,6 +77,12 @@ void CnchWorkerClient::submitManipulationTask(
params.mutation_commands->writeText(write_buf); params.mutation_commands->writeText(write_buf);
} }
if (hasDynamicSubcolumns(storage.getInMemoryMetadata().columns))
{
request.set_dynamic_object_column_schema(
storage.getStorageSnapshot(storage.getInMemoryMetadataPtr(), nullptr)->object_columns.toString());
}
stub->submitManipulationTask(&cntl, &request, &response, nullptr); stub->submitManipulationTask(&cntl, &request, &response, nullptr);
assertController(cntl); assertController(cntl);
@ -322,7 +329,10 @@ brpc::CallId CnchWorkerClient::sendResources(
for (const auto & resource : resources_to_send) for (const auto & resource : resources_to_send)
{ {
if (!resource.sent_create_query) if (!resource.sent_create_query)
{
request.add_create_queries(resource.create_table_query); request.add_create_queries(resource.create_table_query);
request.add_dynamic_object_column_schema(resource.object_columns.toString());
}
/// parts /// parts
auto & table_data_parts = *request.mutable_data_parts()->Add(); auto & table_data_parts = *request.mutable_data_parts()->Add();

View File

@ -13,6 +13,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include <memory>
#include <CloudServices/CnchWorkerResource.h> #include <CloudServices/CnchWorkerResource.h>
#include <Core/Names.h> #include <Core/Names.h>
@ -26,6 +27,7 @@
#include <Parsers/ASTForeignKeyDeclaration.h> #include <Parsers/ASTForeignKeyDeclaration.h>
#include <Parsers/ASTUniqueNotEnforcedDeclaration.h> #include <Parsers/ASTUniqueNotEnforcedDeclaration.h>
#include <Poco/Logger.h> #include <Poco/Logger.h>
#include <Storages/StorageCloudMergeTree.h>
#include <Storages/ForeignKeysDescription.h> #include <Storages/ForeignKeysDescription.h>
#include <Storages/UniqueNotEnforcedDescription.h> #include <Storages/UniqueNotEnforcedDescription.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
@ -43,7 +45,7 @@ namespace ErrorCodes
extern const int TABLE_ALREADY_EXISTS; extern const int TABLE_ALREADY_EXISTS;
} }
void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists) void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists, const ColumnsDescription & object_columns)
{ {
LOG_DEBUG(&Poco::Logger::get("WorkerResource"), "start create cloud table {}", create_query); LOG_DEBUG(&Poco::Logger::get("WorkerResource"), "start create cloud table {}", create_query);
const char * begin = create_query.data(); const char * begin = create_query.data();
@ -139,6 +141,9 @@ void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const Str
StoragePtr res = StorageFactory::instance().get(ast_create_query, "", context, context->getGlobalContext(), columns, constraints, foreign_keys, unique_not_enforced, false); StoragePtr res = StorageFactory::instance().get(ast_create_query, "", context, context->getGlobalContext(), columns, constraints, foreign_keys, unique_not_enforced, false);
res->startup(); res->startup();
if (auto cloud_table = std::dynamic_pointer_cast<StorageCloudMergeTree>(res))
cloud_table->resetObjectColumns(object_columns);
{ {
auto lock = getLock(); auto lock = getLock();
cloud_tables.emplace(std::make_pair(tenant_db, table_name), res); cloud_tables.emplace(std::make_pair(tenant_db, table_name), res);

View File

@ -18,6 +18,7 @@
#include <Interpreters/StorageID.h> #include <Interpreters/StorageID.h>
#include <Storages/IStorage_fwd.h> #include <Storages/IStorage_fwd.h>
#include <Databases/IDatabase.h> #include <Databases/IDatabase.h>
#include <Storages/ColumnsDescription.h>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
@ -33,7 +34,7 @@ class CloudTablesBlockSource;
class CnchWorkerResource class CnchWorkerResource
{ {
public: public:
void executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists = false); void executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists = false, const ColumnsDescription & object_columns = {});
StoragePtr getTable(const StorageID & table_id) const; StoragePtr getTable(const StorageID & table_id) const;
DatabasePtr getDatabase(const String & database_name) const; DatabasePtr getDatabase(const String & database_name) const;
bool isCnchTableInWorker(const StorageID & table_id) const; bool isCnchTableInWorker(const StorageID & table_id) const;

View File

@ -48,8 +48,9 @@
#include <brpc/closure_guard.h> #include <brpc/closure_guard.h>
#include <brpc/controller.h> #include <brpc/controller.h>
#include <brpc/stream.h> #include <brpc/stream.h>
#include "Common/Configurations.h" #include <Common/Configurations.h>
#include "Common/Exception.h" #include <Storages/ColumnsDescription.h>
#include <Common/Exception.h>
#if USE_RDKAFKA #if USE_RDKAFKA
# include <Storages/Kafka/KafkaTaskCommand.h> # include <Storages/Kafka/KafkaTaskCommand.h>
@ -184,6 +185,8 @@ void CnchWorkerServiceImpl::submitManipulationTask(
auto * data = dynamic_cast<StorageCloudMergeTree *>(storage.get()); auto * data = dynamic_cast<StorageCloudMergeTree *>(storage.get());
if (!data) if (!data)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} is not CloudMergeTree", storage->getStorageID().getNameForLogs()); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} is not CloudMergeTree", storage->getStorageID().getNameForLogs());
if (request->has_dynamic_object_column_schema())
data->resetObjectColumns(ColumnsDescription::parse(request->dynamic_object_column_schema()));
auto params = ManipulationTaskParams(storage); auto params = ManipulationTaskParams(storage);
params.type = static_cast<ManipulationType>(request->type()); params.type = static_cast<ManipulationType>(request->type());
@ -632,8 +635,14 @@ void CnchWorkerServiceImpl::sendResources(
{ {
/// create a copy of session_context to avoid modify settings in SessionResource /// create a copy of session_context to avoid modify settings in SessionResource
auto context_for_create = Context::createCopy(query_context); auto context_for_create = Context::createCopy(query_context);
for (const auto & create_query : request->create_queries()) for (int i = 0; i < request->create_queries_size(); i++)
worker_resource->executeCreateQuery(context_for_create, create_query, true); {
auto create_query = request->create_queries().at(i);
auto object_columns = request->dynamic_object_column_schema().at(i);
worker_resource->executeCreateQuery(context_for_create, create_query, false, ColumnsDescription::parse(object_columns));
}
LOG_DEBUG(log, "Successfully create {} queries for Session: {}", request->create_queries_size(), request->txn_id()); LOG_DEBUG(log, "Successfully create {} queries for Session: {}", request->create_queries_size(), request->txn_id());
} }

View File

@ -164,16 +164,16 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
/// If there are references to states in final column, we must hold their ownership /// If there are references to states in final column, we must hold their ownership
/// by holding arenas and source. /// by holding arenas and source.
auto callback = [&](auto & subcolumn) auto callback = [&](IColumn & subcolumn)
{ {
if (auto * aggregate_subcolumn = typeid_cast<ColumnAggregateFunction *>(subcolumn.get())) if (auto * aggregate_subcolumn = typeid_cast<ColumnAggregateFunction *>(&subcolumn))
{ {
aggregate_subcolumn->foreign_arenas = concatArenas(column_aggregate_func.foreign_arenas, column_aggregate_func.my_arena); aggregate_subcolumn->foreign_arenas = concatArenas(column_aggregate_func.foreign_arenas, column_aggregate_func.my_arena);
aggregate_subcolumn->src = column_aggregate_func.getPtr(); aggregate_subcolumn->src = column_aggregate_func.getPtr();
} }
}; };
callback(res); callback(*res);
res->forEachSubcolumnRecursively(callback); res->forEachSubcolumnRecursively(callback);
for (auto * val : data) for (auto * val : data)
@ -349,7 +349,7 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
{ {
size_t size = data.size(); size_t size = data.size();
if (size != filter.size()) if (size != filter.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filter.size(), size);
if (size == 0) if (size == 0)
return cloneEmpty(); return cloneEmpty();
@ -371,7 +371,6 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
return res; return res;
} }
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const
{ {
size_t size = data.size(); size_t size = data.size();

View File

@ -228,6 +228,21 @@ public:
throw Exception("Method hasEqualValues is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method hasEqualValues is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
} }
double getRatioOfDefaultRows(double) const override
{
return 0.0;
}
UInt64 getNumberOfDefaultRows() const override
{
return 0;
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for ColumnAggregateFunction");
}
void getPermutation(PermutationSortDirection direction, PermutationSortStability stability, void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override; size_t limit, int nan_direction_hint, Permutation & res) const override;

View File

@ -629,7 +629,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
{ {
size_t col_size = getOffsets().size(); size_t col_size = getOffsets().size();
if (col_size != filt.size()) if (col_size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), col_size);
if (0 == col_size) if (0 == col_size)
return ColumnArray::create(data); return ColumnArray::create(data);
@ -697,7 +697,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
{ {
size_t size = getOffsets().size(); size_t size = getOffsets().size();
if (size != filt.size()) if (size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), size);
if (size == 0) if (size == 0)
return ColumnArray::create(data); return ColumnArray::create(data);
@ -963,6 +963,20 @@ ColumnPtr ColumnArray::compress() const
}); });
} }
double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
}
UInt64 ColumnArray::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnArray>();
}
void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{ {

View File

@ -172,17 +172,17 @@ public:
ColumnPtr compress() const override; ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override void forEachSubcolumn(MutableColumnCallback callback) override
{ {
callback(offsets); callback(offsets);
callback(data); callback(data);
} }
void forEachSubcolumnRecursively(ColumnCallback callback) override void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{ {
callback(offsets); callback(*offsets);
offsets->forEachSubcolumnRecursively(callback); offsets->forEachSubcolumnRecursively(callback);
callback(data); callback(*data);
data->forEachSubcolumnRecursively(callback); data->forEachSubcolumnRecursively(callback);
} }
@ -193,6 +193,10 @@ public:
return false; return false;
} }
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
bool isCollationSupported() const override { return getData().isCollationSupported(); } bool isCollationSupported() const override { return getData().isCollationSupported(); }
size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return getOffsets()[i - 1]; } size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return getOffsets()[i - 1]; }

View File

@ -257,6 +257,21 @@ public:
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -132,6 +132,9 @@ public:
void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); } void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); } void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); } size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
double getRatioOfDefaultRows(double) const override { throwMustBeDecompressed(); }
UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); }
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); }
protected: protected:
size_t rows; size_t rows;

View File

@ -26,6 +26,7 @@
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/PODArray.h>
namespace DB namespace DB
@ -207,6 +208,7 @@ public:
} }
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -253,7 +255,7 @@ public:
data->getExtremes(min, max); data->getExtremes(min, max);
} }
void forEachSubcolumn(ColumnCallback callback) override void forEachSubcolumn(MutableColumnCallback callback) override
{ {
callback(data); callback(data);
} }
@ -271,6 +273,27 @@ public:
return false; return false;
} }
double getRatioOfDefaultRows(double) const override
{
return data->isDefaultAt(0) ? 1.0 : 0.0;
}
UInt64 getNumberOfDefaultRows() const override
{
return data->isDefaultAt(0) ? s : 0;
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
if (!data->isDefaultAt(0))
{
size_t to = limit && from + limit < size() ? from + limit : size();
indices.reserve(indices.size() + to - from);
for (size_t i = from; i < to; ++i)
indices.push_back(i);
}
}
bool isNullable() const override { return isColumnNullable(*data); } bool isNullable() const override { return isColumnNullable(*data); }
bool onlyNull() const override { return data->isNullAt(0); } bool onlyNull() const override { return data->isNullAt(0); }
bool isNumeric() const override { return data->isNumeric(); } bool isNumeric() const override { return data->isNumeric(); }

View File

@ -175,6 +175,7 @@ public:
bool isDefaultAt(size_t n) const override { return data[n].value == 0; } bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override; ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -198,6 +199,21 @@ public:
return false; return false;
} }
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr compress() const override; ColumnPtr compress() const override;

View File

@ -177,6 +177,7 @@ public:
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -214,6 +215,21 @@ public:
return false; return false;
} }
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnFixedString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
}
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; } bool isFixedAndContiguous() const override { return true; }

View File

@ -187,6 +187,14 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
captured_columns.push_back(column); captured_columns.push_back(column);
} }
DataTypePtr ColumnFunction::getResultType() const
{
// if (recursively_convert_result_to_full_column_if_low_cardinality)
// return recursiveRemoveLowCardinality(function->getResultType());
return function->getResultType();
}
ColumnWithTypeAndName ColumnFunction::reduce() const ColumnWithTypeAndName ColumnFunction::reduce() const
{ {
auto args = function->getArgumentTypes().size(); auto args = function->getArgumentTypes().size();
@ -203,4 +211,12 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
return res; return res;
} }
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column)
{
const ColumnFunction * column_function;
if ((column_function = typeid_cast<const ColumnFunction *>(column.get())) && column_function->isShortCircuitArgument())
return column_function;
return nullptr;
}
} }

View File

@ -155,6 +155,25 @@ public:
throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
} }
DataTypePtr getResultType() const;
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
bool isShortCircuitArgument() const { return false; }
private: private:
size_t size_; size_t size_;
FunctionBasePtr function; FunctionBasePtr function;
@ -163,4 +182,6 @@ private:
void appendArgument(const ColumnWithTypeAndName & column); void appendArgument(const ColumnWithTypeAndName & column);
}; };
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column);
} }

View File

@ -323,7 +323,7 @@ public:
return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes();
} }
void forEachSubcolumn(ColumnCallback callback) override void forEachSubcolumn(MutableColumnCallback callback) override
{ {
if (full_state) if (full_state)
{ {
@ -338,22 +338,22 @@ public:
callback(dictionary.getColumnUniquePtr()); callback(dictionary.getColumnUniquePtr());
} }
void forEachSubcolumnRecursively(ColumnCallback callback) override void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{ {
if (isFullState()) if (isFullState())
{ {
callback(nested_column); callback(*nested_column);
nested_column->forEachSubcolumnRecursively(callback); nested_column->forEachSubcolumnRecursively(callback);
} }
callback(idx.getPositionsPtr()); callback(*idx.getPositionsPtr());
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback); idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
/// Column doesn't own dictionary if it's shared. /// Column doesn't own dictionary if it's shared.
if (!dictionary.isShared()) if (!dictionary.isShared())
{ {
callback(dictionary.getColumnUniquePtr()); callback(*dictionary.getColumnUniquePtr());
dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback); dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback);
} }
} }
@ -371,6 +371,21 @@ public:
return false; return false;
} }
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getIndexes().getRatioOfDefaultRows(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getIndexes().getNumberOfDefaultRows();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndexes().getIndicesOfNonDefaultRows(indices, from, limit);
}
bool valuesHaveFixedSize() const override bool valuesHaveFixedSize() const override
{ {
if (full_state) if (full_state)
@ -482,6 +497,7 @@ public:
const ColumnPtr & getPositions() const { return positions; } const ColumnPtr & getPositions() const { return positions; }
WrappedPtr & getPositionsPtr() { return positions; } WrappedPtr & getPositionsPtr() { return positions; }
const WrappedPtr & getPositionsPtr() const { return positions; }
size_t getPositionAt(size_t row) const; size_t getPositionAt(size_t row) const;
void insertPosition(UInt64 position); void insertPosition(UInt64 position);
void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit); void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit);

View File

@ -383,14 +383,14 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
max = std::move(map_max_value); max = std::move(map_max_value);
} }
void ColumnMap::forEachSubcolumn(ColumnCallback callback) void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
{ {
nested->forEachSubcolumn(callback); nested->forEachSubcolumn(callback);
} }
void ColumnMap::forEachSubcolumnRecursively(ColumnCallback callback) void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{ {
callback(nested); callback(*nested);
nested->forEachSubcolumnRecursively(callback); nested->forEachSubcolumnRecursively(callback);
} }
@ -410,6 +410,21 @@ ColumnPtr ColumnMap::compress() const
}); });
} }
double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
}
UInt64 ColumnMap::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnMap>();
}
void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
}
/** /**
* Generic implementation of get implicit value column based on key value. * Generic implementation of get implicit value column based on key value.
* TODO: specialize this function for Number type and String type. * TODO: specialize this function for Number type and String type.

View File

@ -117,9 +117,12 @@ public:
size_t byteSizeAt(size_t n) const override; size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override; void protect() override;
void forEachSubcolumn(ColumnCallback callback) override; void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override; void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override; bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
/** Access embeded columns*/ /** Access embeded columns*/
const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); } const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }

View File

@ -149,17 +149,17 @@ public:
ColumnPtr compress() const override; ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override void forEachSubcolumn(MutableColumnCallback callback) override
{ {
callback(nested_column); callback(nested_column);
callback(null_map); callback(null_map);
} }
void forEachSubcolumnRecursively(ColumnCallback callback) override void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{ {
callback(nested_column); callback(*nested_column);
nested_column->forEachSubcolumnRecursively(callback); nested_column->forEachSubcolumnRecursively(callback);
callback(null_map); callback(*null_map);
null_map->forEachSubcolumnRecursively(callback); null_map->forEachSubcolumnRecursively(callback);
} }
@ -170,6 +170,21 @@ public:
return false; return false;
} }
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnNullable>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnNullable>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
}
bool isNullable() const override { return true; } bool isNullable() const override { return true; }
bool isFixedAndContiguous() const override { return false; } bool isFixedAndContiguous() const override { return false; }
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }

1095
src/Columns/ColumnObject.cpp Normal file

File diff suppressed because it is too large Load Diff

271
src/Columns/ColumnObject.h Normal file
View File

@ -0,0 +1,271 @@
#pragma once
#include <Core/Field.h>
#include <Core/Names.h>
#include <Columns/IColumn.h>
#include <Common/PODArray.h>
#include <Common/HashTable/HashMap.h>
#include <DataTypes/Serializations/JSONDataParser.h>
#include <DataTypes/Serializations/SubcolumnsTree.h>
#include <DataTypes/IDataType.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/// Info that represents a scalar or array field in a decomposed view.
/// It allows to recreate field with different number
/// of dimensions or nullability.
struct FieldInfo
{
/// The common type of of all scalars in field.
DataTypePtr scalar_type;
/// Do we have NULL scalar in field.
bool have_nulls;
/// If true then we have scalars with different types in array and
/// we need to convert scalars to the common type.
bool need_convert;
/// Number of dimension in array. 0 if field is scalar.
size_t num_dimensions;
/// If true then this field is an array of variadic dimension field
/// and we need to normalize the dimension
bool need_fold_dimension;
};
FieldInfo getFieldInfo(const Field & field);
/** A column that represents object with dynamic set of subcolumns.
* Subcolumns are identified by paths in document and are stored in
* a trie-like structure. ColumnObject is not suitable for writing into tables
* and it should be converted to Tuple with fixed set of subcolumns before that.
*/
class ColumnObject final : public COWHelper<IColumn, ColumnObject>
{
public:
/** Class that represents one subcolumn.
* It stores values in several parts of column
* and keeps current common type of all parts.
* We add a new column part with a new type, when we insert a field,
* which can't be converted to the current common type.
* After insertion of all values subcolumn should be finalized
* for writing and other operations.
*/
class Subcolumn
{
public:
Subcolumn() = default;
Subcolumn(size_t size_, bool is_nullable_);
Subcolumn(MutableColumnPtr && data_, bool is_nullable_);
size_t size() const;
size_t byteSize() const;
size_t allocatedBytes() const;
void get(size_t n, Field & res) const;
bool isFinalized() const;
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); }
size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); }
/// Checks the consistency of column's parts stored in @data.
void checkTypes() const;
/// Inserts a field, which scalars can be arbitrary, but number of
/// dimensions should be consistent with current common type.
void insert(Field field);
void insert(Field field, FieldInfo info);
void insertDefault();
void insertManyDefaults(size_t length);
void insertRangeFrom(const Subcolumn & src, size_t start, size_t length);
void popBack(size_t n);
Subcolumn cut(size_t start, size_t length) const;
/// Converts all column's parts to the common type and
/// creates a single column that stores all values.
void finalize();
/// Returns last inserted field.
Field getLastField() const;
FieldInfo getFieldInfo() const;
/// Recreates subcolumn with default scalar values and keeps sizes of arrays.
/// Used to create columns of type Nested with consistent array sizes.
Subcolumn recreateWithDefaultValues(const FieldInfo & field_info) const;
/// Returns single column if subcolumn in finalizes.
/// Otherwise -- undefined behaviour.
IColumn & getFinalizedColumn();
const IColumn & getFinalizedColumn() const;
const ColumnPtr & getFinalizedColumnPtr() const;
const std::vector<WrappedPtr> & getData() const { return data; }
size_t getNumberOfDefaultsInPrefix() const { return num_of_defaults_in_prefix; }
friend class ColumnObject;
private:
class LeastCommonType
{
public:
LeastCommonType();
explicit LeastCommonType(DataTypePtr type_);
const DataTypePtr & get() const { return type; }
const DataTypePtr & getBase() const { return base_type; }
size_t getNumberOfDimensions() const { return num_dimensions; }
private:
DataTypePtr type;
DataTypePtr base_type;
size_t num_dimensions = 0;
};
void addNewColumnPart(DataTypePtr type);
/// Current least common type of all values inserted to this subcolumn.
LeastCommonType least_common_type;
/// If true then common type type of subcolumn is Nullable
/// and default values are NULLs.
bool is_nullable = false;
/// Parts of column. Parts should be in increasing order in terms of subtypes/supertypes.
/// That means that the least common type for i-th prefix is the type of i-th part
/// and it's the supertype for all type of column from 0 to i-1.
std::vector<WrappedPtr> data;
/// Until we insert any non-default field we don't know further
/// least common type and we count number of defaults in prefix,
/// which will be converted to the default type of final common type.
size_t num_of_defaults_in_prefix = 0;
size_t num_rows = 0;
};
using Subcolumns = SubcolumnsTree<Subcolumn>;
private:
/// If true then all subcolumns are nullable.
const bool is_nullable;
Subcolumns subcolumns;
size_t num_rows;
public:
static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
explicit ColumnObject(bool is_nullable_);
ColumnObject(Subcolumns && subcolumns_, bool is_nullable_);
/// Checks that all subcolumns have consistent sizes.
void checkConsistency() const;
bool hasSubcolumn(const PathInData & key) const;
const Subcolumn & getSubcolumn(const PathInData & key) const;
Subcolumn & getSubcolumn(const PathInData & key);
void incrementNumRows() { ++num_rows; }
/// Adds a subcolumn from existing IColumn.
void addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn);
/// Adds a subcolumn of specific size with default values.
void addSubcolumn(const PathInData & key, size_t new_size);
/// Adds a subcolumn of type Nested of specific size with default values.
/// It cares about consistency of sizes of Nested arrays.
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
/// Finds a subcolumn from the same Nested type as @entry and inserts
/// an array with default values with consistent sizes as in Nested type.
bool tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const;
bool tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const;
const Subcolumns & getSubcolumns() const { return subcolumns; }
Subcolumns & getSubcolumns() { return subcolumns; }
PathsInData getKeys() const;
/// Part of interface
const char * getFamilyName() const override { return "Object"; }
TypeIndex getDataType() const override { return TypeIndex::Object; }
size_t size() const override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumnPtr cloneResized(size_t new_size) const override;
/// Finalizes all subcolumns.
void finalize() override;
bool isFinalized() const override;
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer) override;
/// All other methods throw exception.
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); }
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); }
UInt64 getNumberOfDefaultRows() const override { throwMustBeConcrete(); }
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); }
private:
[[noreturn]] static void throwMustBeConcrete()
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ColumnObject must be converted to ColumnTuple before use");
}
template <typename Func>
MutableColumnPtr applyForSubcolumns(Func && func) const;
/// It's used to get shared sized of Nested to insert correct default values.
const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const;
};
}

View File

@ -299,6 +299,21 @@ public:
void validate() const; void validate() const;
bool isCollationSupported() const override { return true; } bool isCollationSupported() const override { return true; }
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
}; };

View File

@ -214,7 +214,6 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
return res; return res;
} }
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
{ {
size_t size = offsets.size(); size_t size = offsets.size();

View File

@ -310,6 +310,21 @@ public:
return typeid(rhs) == typeid(ColumnString); return typeid(rhs) == typeid(ColumnString);
} }
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
}
Chars & getChars() { return chars; } Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; } const Chars & getChars() const { return chars; }

View File

@ -503,18 +503,18 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
max = max_tuple; max = max_tuple;
} }
void ColumnTuple::forEachSubcolumn(ColumnCallback callback) void ColumnTuple::forEachSubcolumn(MutableColumnCallback callback)
{ {
for (auto & column : columns) for (auto & column : columns)
callback(column); callback(column);
} }
void ColumnTuple::forEachSubcolumnRecursively(ColumnCallback callback) void ColumnTuple::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{ {
for (auto & column : columns) for (auto & column : columns)
{ {
callback(column); callback(*column);
column->forEachSubcolumnRecursively(callback); column->forEachSubcolumnRecursively(callback);
} }
} }
@ -569,4 +569,19 @@ ColumnPtr ColumnTuple::compress() const
}); });
} }
double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
}
UInt64 ColumnTuple::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnTuple>();
}
void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
} }

View File

@ -115,12 +115,14 @@ public:
size_t byteSizeAt(size_t n) const override; size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override; void protect() override;
void forEachSubcolumn(ColumnCallback callback) override; void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override; void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override; bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override; bool isCollationSupported() const override;
ColumnPtr compress() const override; ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
size_t tupleSize() const { return columns.size(); } size_t tupleSize() const { return columns.size(); }
const IColumn & getColumn(size_t idx) const { return *columns[idx]; } const IColumn & getColumn(size_t idx) const { return *columns[idx]; }

View File

@ -37,7 +37,7 @@
#include <common/range.h> #include <common/range.h>
#include <common/unaligned.h> #include <common/unaligned.h>
#include "Columns/ColumnConst.h" #include <Columns/ColumnConst.h>
namespace DB namespace DB
@ -128,7 +128,10 @@ public:
return column_holder->allocatedBytes() + reverse_index.allocatedBytes() return column_holder->allocatedBytes() + reverse_index.allocatedBytes()
+ (nested_null_mask ? nested_null_mask->allocatedBytes() : 0); + (nested_null_mask ? nested_null_mask->allocatedBytes() : 0);
} }
void forEachSubcolumn(IColumn::ColumnCallback callback) override
void forEachSubcolumn(IColumn::ColumnCallback callback) const override { callback(column_holder); }
void forEachSubcolumn(IColumn::MutableColumnCallback callback) override
{ {
callback(column_holder); callback(column_holder);
reverse_index.setColumn(getRawColumnPtr()); reverse_index.setColumn(getRawColumnPtr());
@ -136,6 +139,21 @@ public:
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask); nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
} }
void forEachSubcolumnRecursively(IColumn::RecursiveColumnCallback callback) const override
{
callback(*column_holder);
column_holder->forEachSubcolumnRecursively(callback);
}
void forEachSubcolumnRecursively(IColumn::RecursiveMutableColumnCallback callback) override
{
callback(*column_holder);
column_holder->forEachSubcolumnRecursively(callback);
reverse_index.setColumn(getRawColumnPtr());
if (is_nullable)
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
void forEachSubcolumnRecursively(IColumn::ColumnCallback callback) override void forEachSubcolumnRecursively(IColumn::ColumnCallback callback) override
{ {
callback(column_holder); callback(column_holder);

View File

@ -356,6 +356,21 @@ public:
return typeid(rhs) == typeid(ColumnVector<T>); return typeid(rhs) == typeid(ColumnVector<T>);
} }
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr compress() const override; ColumnPtr compress() const override;
/// Replace elements that match the filter with zeroes. If inverted replaces not matched elements. /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements.

View File

@ -15,12 +15,10 @@ String IColumn::dumpStructure() const
WriteBufferFromOwnString res; WriteBufferFromOwnString res;
res << getFamilyName() << "(size = " << size(); res << getFamilyName() << "(size = " << size();
ColumnCallback callback = [&](ColumnPtr & subcolumn) forEachSubcolumn([&](const auto & subcolumn)
{ {
res << ", " << subcolumn->dumpStructure(); res << ", " << subcolumn->dumpStructure();
}; });
const_cast<IColumn*>(this)->forEachSubcolumn(callback);
res << ")"; res << ")";
return res.str(); return res.str();
@ -31,6 +29,50 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
insert(src[n]); insert(src[n]);
} }
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
{
if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size());
auto res = cloneEmpty();
res->reserve(total_rows);
ssize_t current_offset = -1;
for (size_t i = 0; i < offsets.size(); ++i)
{
ssize_t offsets_diff = static_cast<ssize_t>(offsets[i]) - current_offset;
current_offset = offsets[i];
if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1);
res->insertFrom(*this, i + shift);
}
ssize_t offsets_diff = static_cast<ssize_t>(total_rows) - current_offset;
if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1);
return res;
}
void IColumn::forEachSubcolumn(ColumnCallback callback) const
{
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
{
callback(std::as_const(subcolumn));
});
}
void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
{
std::as_const(*this).forEachSubcolumnRecursively([&callback](const IColumn & subcolumn)
{
callback(const_cast<IColumn &>(subcolumn));
});
}
bool isColumnNullable(const IColumn & column) bool isColumnNullable(const IColumn & column)
{ {
return checkColumn<ColumnNullable>(column); return checkColumn<ColumnNullable>(column);

View File

@ -91,6 +91,10 @@ public:
/// If column is ColumnLowCardinality, transforms is to full column. /// If column is ColumnLowCardinality, transforms is to full column.
virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); } virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
/// If column isn't ColumnSparse, return itself.
/// If column is ColumnSparse, transforms it to full column.
[[nodiscard]] virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
/// Creates empty column with the same type. /// Creates empty column with the same type.
virtual MutablePtr cloneEmpty() const { return cloneResized(0); } virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
@ -209,6 +213,13 @@ public:
insertFrom(src, position); insertFrom(src, position);
} }
/// Appends one field multiple times. Can be optimized in inherited classes.
virtual void insertMany(const Field & field, size_t length)
{
for (size_t i = 0; i < length; ++i)
insert(field);
}
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented). /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
/// Is used to optimize some computations (in aggregation, for example). /// Is used to optimize some computations (in aggregation, for example).
/// Parameter length could be ignored if column values have fixed size. /// Parameter length could be ignored if column values have fixed size.
@ -414,8 +425,18 @@ public:
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them. /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
/// Shallow: doesn't do recursive calls; don't do call for itself. /// Shallow: doesn't do recursive calls; don't do call for itself.
using ColumnCallback = std::function<void(WrappedPtr&)>;
virtual void forEachSubcolumn(ColumnCallback) {} using ColumnCallback = std::function<void(const WrappedPtr &)>;
virtual void forEachSubcolumn(ColumnCallback) const;
using MutableColumnCallback = std::function<void(WrappedPtr &)>;
virtual void forEachSubcolumn(MutableColumnCallback) {}
using RecursiveColumnCallback = std::function<void(const IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const;
using RecursiveMutableColumnCallback = std::function<void(IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback) {}
/// Similar to forEachSubcolumn but it also do recursive calls. /// Similar to forEachSubcolumn but it also do recursive calls.
virtual void forEachSubcolumnRecursively(ColumnCallback) {} virtual void forEachSubcolumnRecursively(ColumnCallback) {}
@ -427,6 +448,23 @@ public:
throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
} }
/// Returns ratio of values in column, that are equal to default value of column.
/// Checks only @sample_ratio ratio of rows.
[[nodiscard]] virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
/// Returns number of values in column, that are equal to default value of column.
[[nodiscard]] virtual UInt64 getNumberOfDefaultRows() const = 0;
/// Returns indices of values in column, that not equal to default value of column.
virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0;
/// Returns column with @total_size elements.
/// In result column values from current column are at positions from @offsets.
/// Other values are filled by @default_value.
/// @shift means how much rows to skip from the beginning of current column.
/// Used to create full column from sparse.
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
/// Compress column in memory to some representation that allows to decompress it back. /// Compress column in memory to some representation that allows to decompress it back.
/// Return itself if compression is not applicable for this column type. /// Return itself if compression is not applicable for this column type.
virtual Ptr compress() const virtual Ptr compress() const
@ -442,6 +480,16 @@ public:
return getPtr(); return getPtr();
} }
/// Some columns may require finalization before using of other operations.
virtual void finalize() {}
virtual bool isFinalized() const { return true; }
MutablePtr cloneFinalized() const
{
auto finalized = IColumn::mutate(getPtr());
finalized->finalize();
return finalized;
}
static MutablePtr mutate(Ptr ptr) static MutablePtr mutate(Ptr ptr)
{ {
@ -509,6 +557,8 @@ public:
virtual bool lowCardinality() const { return false; } virtual bool lowCardinality() const { return false; }
[[nodiscard]] virtual bool isSparse() const { return false; }
virtual bool isCollationSupported() const { return false; } virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default; virtual ~IColumn() = default;
@ -545,6 +595,9 @@ protected:
template <typename Derived> template <typename Derived>
double getRatioOfDefaultRowsImpl(double sample_ratio) const; double getRatioOfDefaultRowsImpl(double sample_ratio) const;
template <typename Derived>
UInt64 getNumberOfDefaultRowsImpl() const;
template <typename Derived> template <typename Derived>
void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const; void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const;

View File

@ -180,6 +180,21 @@ public:
return res; return res;
} }
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
void gather(ColumnGathererStream &) override void gather(ColumnGathererStream &) override
{ {
throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);

View File

@ -195,6 +195,16 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
return static_cast<double>(res) / num_checked_rows; return static_cast<double>(res) / num_checked_rows;
} }
template <typename Derived>
UInt64 IColumn::getNumberOfDefaultRowsImpl() const
{
UInt64 res = 0;
size_t num_rows = size();
for (size_t i = 0; i < num_rows; ++i)
res += static_cast<const Derived &>(*this).isDefaultAt(i);
return res;
}
template <typename Derived> template <typename Derived>
void IColumn::getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const void IColumn::getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const
{ {

View File

@ -210,6 +210,21 @@ public:
{ {
throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
} }
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getRatioOfDefaultRows' not implemented for ColumnUnique");
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getNumberOfDefaultRows' not implemented for ColumnUnique");
}
void getIndicesOfNonDefaultRows(IColumn::Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getIndicesOfNonDefaultRows' not implemented for ColumnUnique");
}
}; };
using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr; using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;

View File

@ -48,6 +48,7 @@
M(11, POSITION_OUT_OF_BOUND) \ M(11, POSITION_OUT_OF_BOUND) \
M(12, PARAMETER_OUT_OF_BOUND) \ M(12, PARAMETER_OUT_OF_BOUND) \
M(13, SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH) \ M(13, SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH) \
M(14, TOO_MANY_SUBCOLUMNS_IN_JSON) \
M(15, DUPLICATE_COLUMN) \ M(15, DUPLICATE_COLUMN) \
M(16, NO_SUCH_COLUMN_IN_TABLE) \ M(16, NO_SUCH_COLUMN_IN_TABLE) \
M(17, DELIMITER_IN_STRING_LITERAL_DOESNT_MATCH) \ M(17, DELIMITER_IN_STRING_LITERAL_DOESNT_MATCH) \

View File

@ -149,6 +149,19 @@ void FieldVisitorCompatibleWriteBinary::operator()(const BitMap64 & x, WriteBuff
writeString(buffer.data(), bytes, buf); writeString(buffer.data(), bytes, buf);
} }
void FieldVisitorCompatibleWriteBinary::operator()(const Object & x, WriteBuffer & buf) const
{
const size_t size = x.size();
writeBinary(size, buf);
for (const auto & [key, value] : x)
{
const UInt8 type = value.getType();
writeBinary(type, buf);
writeBinary(key, buf);
Field::dispatch([&buf] (const auto & val) { FieldVisitorCompatibleWriteBinary()(val, buf); }, value);
}
}
void FieldVisitorCompatibleReadBinary::deserialize(UInt64 & value, ReadBuffer & buf) void FieldVisitorCompatibleReadBinary::deserialize(UInt64 & value, ReadBuffer & buf)
{ {
@ -306,4 +319,19 @@ void FieldVisitorCompatibleReadBinary::deserialize(BitMap64 & value, ReadBuffer
value = BitMap64::readSafe(bitmap_buffer.data(), bytes); value = BitMap64::readSafe(bitmap_buffer.data(), bytes);
} }
void FieldVisitorCompatibleReadBinary::deserialize(Object & value, ReadBuffer & buf)
{
size_t size;
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
{
UInt8 type;
String key;
readBinary(type, buf);
readBinary(key, buf);
value[key] = Field::dispatch(FieldVisitorCompatibleReadBinary(buf), static_cast<Field::Types::Which>(type));
}
}
} }

View File

@ -35,6 +35,7 @@ public:
void operator()(const DecimalField<Decimal256> & x, WriteBuffer & buf) const; void operator()(const DecimalField<Decimal256> & x, WriteBuffer & buf) const;
void operator()(const AggregateFunctionStateData & x, WriteBuffer & buf) const; void operator()(const AggregateFunctionStateData & x, WriteBuffer & buf) const;
void operator()(const BitMap64 & x, WriteBuffer & buf) const; void operator()(const BitMap64 & x, WriteBuffer & buf) const;
void operator()(const Object & x, WriteBuffer & buf) const;
void operator()(const IPv4 & x, WriteBuffer & buf) const; void operator()(const IPv4 & x, WriteBuffer & buf) const;
void operator()(const IPv6 & x, WriteBuffer & buf) const; void operator()(const IPv6 & x, WriteBuffer & buf) const;
}; };
@ -97,6 +98,7 @@ private:
static void deserialize(Tuple & value, ReadBuffer & buf); static void deserialize(Tuple & value, ReadBuffer & buf);
static void deserialize(Map & value, ReadBuffer & buf); static void deserialize(Map & value, ReadBuffer & buf);
static void deserialize(BitMap64 & value, ReadBuffer & buf); static void deserialize(BitMap64 & value, ReadBuffer & buf);
static void deserialize(Object & value, ReadBuffer & buf);
}; };
} }

View File

@ -77,6 +77,11 @@ public:
throw Exception("Cannot convert Map to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); throw Exception("Cannot convert Map to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
} }
T operator() (const Object &) const
{
throw Exception("Cannot convert Object to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
}
T operator() (const UInt64 & x) const { return T(x); } T operator() (const UInt64 & x) const { return T(x); }
T operator() (const Int64 & x) const { return T(x); } T operator() (const Int64 & x) const { return T(x); }
T operator() (const Int128 & x) const { return T(x); } T operator() (const Int128 & x) const { return T(x); }

View File

@ -138,4 +138,21 @@ String FieldVisitorDump::operator() (const BitMap64 & x) const
return wb.str(); return wb.str();
} }
String FieldVisitorDump::operator() (const Object & x) const
{
WriteBufferFromOwnString wb;
wb << "Object_(";
for (auto it = x.begin(); it != x.end(); ++it)
{
if (it != x.begin())
wb << ", ";
wb << "(" << it->first << ", " << applyVisitor(*this, it->second) << ")";
}
wb << ')';
return wb.str();
}
} }

View File

@ -53,6 +53,7 @@ public:
String operator() (const DecimalField<Decimal256> & x) const; String operator() (const DecimalField<Decimal256> & x) const;
String operator() (const AggregateFunctionStateData & x) const; String operator() (const AggregateFunctionStateData & x) const;
String operator() (const BitMap64 & x) const; String operator() (const BitMap64 & x) const;
String operator() (const Object & x) const;
}; };
} }

View File

@ -238,4 +238,17 @@ void FieldVisitorHash::operator() (const BitMap64 & x) const
applyVisitor(*this, Field(*it)); applyVisitor(*this, Field(*it));
} }
void FieldVisitorHash::operator() (const Object & x) const
{
UInt8 type = Field::Types::Object;
hash.update(type);
hash.update(x.size());
for (const auto & [key, value]: x)
{
hash.update(key);
applyVisitor(*this, value);
}
}
} }

View File

@ -59,6 +59,7 @@ public:
void operator() (const DecimalField<Decimal256> & x) const; void operator() (const DecimalField<Decimal256> & x) const;
void operator() (const AggregateFunctionStateData & x) const; void operator() (const AggregateFunctionStateData & x) const;
void operator() (const BitMap64 & x) const; void operator() (const BitMap64 & x) const;
void operator() (const Object & x) const;
}; };
} }

View File

@ -53,6 +53,7 @@ bool FieldVisitorSum::operator() (UUID &) const { throw Exception("Cannot sum UU
bool FieldVisitorSum::operator() (IPv4 &) const { throw Exception("Cannot sum IPv4s", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (IPv4 &) const { throw Exception("Cannot sum IPv4s", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (IPv6 &) const { throw Exception("Cannot sum IPv6s", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (IPv6 &) const { throw Exception("Cannot sum IPv6s", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (BitMap64 &) const { throw Exception("Cannot sum BitMap64", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (BitMap64 &) const { throw Exception("Cannot sum BitMap64", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (Object &) const { throw Exception("Cannot sum Objects", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (AggregateFunctionStateData &) const bool FieldVisitorSum::operator() (AggregateFunctionStateData &) const
{ {

View File

@ -53,6 +53,7 @@ public:
bool operator() (IPv6 &) const; bool operator() (IPv6 &) const;
bool operator() (AggregateFunctionStateData &) const; bool operator() (AggregateFunctionStateData &) const;
bool operator() (BitMap64 &) const; bool operator() (BitMap64 &) const;
bool operator() (Object &) const;
template <typename T> template <typename T>
bool operator() (DecimalField<T> & x) const bool operator() (DecimalField<T> & x) const

View File

@ -160,4 +160,23 @@ String FieldVisitorToString::operator() (const BitMap64 & x) const
return wb.str(); return wb.str();
} }
String FieldVisitorToString::operator() (const Object & x) const
{
WriteBufferFromOwnString wb;
wb << '{';
for (auto it = x.begin(); it != x.end(); ++it)
{
if (it != x.begin())
wb << ", ";
writeDoubleQuoted(it->first, wb);
wb << ": " << applyVisitor(*this, it->second);
}
wb << '}';
return wb.str();
}
} }

View File

@ -53,6 +53,7 @@ public:
String operator() (const DecimalField<Decimal256> & x) const; String operator() (const DecimalField<Decimal256> & x) const;
String operator() (const AggregateFunctionStateData & x) const; String operator() (const AggregateFunctionStateData & x) const;
String operator() (const BitMap64 & x) const; String operator() (const BitMap64 & x) const;
String operator() (const Object & x) const;
}; };
} }

View File

@ -104,4 +104,18 @@ void FieldVisitorWriteBinary::operator() (const BitMap64 & x, WriteBuffer & buf)
writeString(buffer.data(), bytes, buf); writeString(buffer.data(), bytes, buf);
} }
void FieldVisitorWriteBinary::operator() (const Object & x, WriteBuffer & buf) const
{
const size_t size = x.size();
writeBinary(size, buf);
for (const auto & [key, value] : x)
{
const UInt8 type = value.getType();
writeBinary(type, buf);
writeBinary(key, buf);
Field::dispatch([&buf] (const auto & val) { FieldVisitorWriteBinary()(val, buf); }, value);
}
}
} }

View File

@ -52,6 +52,7 @@ public:
void operator() (const DecimalField<Decimal256> & x, WriteBuffer & buf) const; void operator() (const DecimalField<Decimal256> & x, WriteBuffer & buf) const;
void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const; void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const;
void operator() (const BitMap64 & x, WriteBuffer & buf) const; void operator() (const BitMap64 & x, WriteBuffer & buf) const;
void operator() (const Object & x, WriteBuffer & buf) const;
}; };
} }

View File

@ -2,9 +2,6 @@
#include <Common/Exception.h> #include <Common/Exception.h>
#include <common/types.h> #include <common/types.h>
#include <common/defines.h>
#include "ElementTypes.h"
namespace DB namespace DB
{ {
@ -25,26 +22,25 @@ struct DummyJSONParser
class Element class Element
{ {
public: public:
Element() = default; Element() {}
static ElementType type() { return ElementType::NULL_VALUE; } bool isInt64() const { return false; }
static bool isInt64() { return false; } bool isUInt64() const { return false; }
static bool isUInt64() { return false; } bool isDouble() const { return false; }
static bool isDouble() { return false; } bool isString() const { return false; }
static bool isString() { return false; } bool isArray() const { return false; }
static bool isArray() { return false; } bool isObject() const { return false; }
static bool isObject() { return false; } bool isBool() const { return false; }
static bool isBool() { return false; } bool isNull() const { return false; }
static bool isNull() { return false; }
static Int64 getInt64() { return 0; } Int64 getInt64() const { return 0; }
static UInt64 getUInt64() { return 0; } UInt64 getUInt64() const { return 0; }
static double getDouble() { return 0; } double getDouble() const { return 0; }
static bool getBool() { return false; } bool getBool() const { return false; }
static std::string_view getString() { return {}; } std::string_view getString() const { return {}; }
static Array getArray() { return {}; } Array getArray() const { return {}; }
static Object getObject() { return {}; } Object getObject() const { return {}; }
static Element getElement() { return {}; } Element getElement() { return {}; }
}; };
/// References an array in a JSON document. /// References an array in a JSON document.
@ -56,14 +52,14 @@ struct DummyJSONParser
public: public:
Element operator*() const { return {}; } Element operator*() const { return {}; }
Iterator & operator++() { return *this; } Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; } /// NOLINT Iterator operator++(int) { return *this; }
friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; } friend bool operator!=(const Iterator &, const Iterator &) { return false; }
}; };
static Iterator begin() { return {}; } Iterator begin() const { return {}; }
static Iterator end() { return {}; } Iterator end() const { return {}; }
static size_t size() { return 0; } size_t size() const { return 0; }
Element operator[](size_t) const { return {}; } Element operator[](size_t) const { return {}; }
}; };
@ -78,15 +74,15 @@ struct DummyJSONParser
public: public:
KeyValuePair operator*() const { return {}; } KeyValuePair operator*() const { return {}; }
Iterator & operator++() { return *this; } Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; } /// NOLINT Iterator operator++(int) { return *this; }
friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; } friend bool operator!=(const Iterator &, const Iterator &) { return false; }
}; };
static Iterator begin() { return {}; } Iterator begin() const { return {}; }
static Iterator end() { return {}; } Iterator end() const { return {}; }
static size_t size() { return 0; } size_t size() const { return 0; }
bool find(std::string_view, Element &) const { return false; } /// NOLINT bool find(const std::string_view &, Element &) const { return false; }
#if 0 #if 0
/// Optional: Provides access to an object's element by index. /// Optional: Provides access to an object's element by index.
@ -95,7 +91,7 @@ struct DummyJSONParser
}; };
/// Parses a JSON document, returns the reference to its root element if succeeded. /// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(std::string_view, Element &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Functions JSON* are not supported"); } /// NOLINT bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; }
#if 0 #if 0
/// Optional: Allocates memory to parse JSON documents faster. /// Optional: Allocates memory to parse JSON documents faster.

View File

@ -2,6 +2,9 @@
#include <Common/Exception.h> #include <Common/Exception.h>
#include <common/types.h> #include <common/types.h>
#include <common/defines.h>
#include "ElementTypes.h"
namespace DB namespace DB
{ {
@ -22,25 +25,26 @@ struct DummyJSONParser
class Element class Element
{ {
public: public:
Element() {} Element() = default;
bool isInt64() const { return false; } static ElementType type() { return ElementType::NULL_VALUE; }
bool isUInt64() const { return false; } static bool isInt64() { return false; }
bool isDouble() const { return false; } static bool isUInt64() { return false; }
bool isString() const { return false; } static bool isDouble() { return false; }
bool isArray() const { return false; } static bool isString() { return false; }
bool isObject() const { return false; } static bool isArray() { return false; }
bool isBool() const { return false; } static bool isObject() { return false; }
bool isNull() const { return false; } static bool isBool() { return false; }
static bool isNull() { return false; }
Int64 getInt64() const { return 0; } static Int64 getInt64() { return 0; }
UInt64 getUInt64() const { return 0; } static UInt64 getUInt64() { return 0; }
double getDouble() const { return 0; } static double getDouble() { return 0; }
bool getBool() const { return false; } static bool getBool() { return false; }
std::string_view getString() const { return {}; } static std::string_view getString() { return {}; }
Array getArray() const { return {}; } static Array getArray() { return {}; }
Object getObject() const { return {}; } static Object getObject() { return {}; }
Element getElement() { return {}; } static Element getElement() { return {}; }
}; };
/// References an array in a JSON document. /// References an array in a JSON document.
@ -52,14 +56,14 @@ struct DummyJSONParser
public: public:
Element operator*() const { return {}; } Element operator*() const { return {}; }
Iterator & operator++() { return *this; } Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; } Iterator operator++(int) { return *this; } /// NOLINT
friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; } friend bool operator!=(const Iterator &, const Iterator &) { return false; }
}; };
Iterator begin() const { return {}; } static Iterator begin() { return {}; }
Iterator end() const { return {}; } static Iterator end() { return {}; }
size_t size() const { return 0; } static size_t size() { return 0; }
Element operator[](size_t) const { return {}; } Element operator[](size_t) const { return {}; }
}; };
@ -74,15 +78,15 @@ struct DummyJSONParser
public: public:
KeyValuePair operator*() const { return {}; } KeyValuePair operator*() const { return {}; }
Iterator & operator++() { return *this; } Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; } Iterator operator++(int) { return *this; } /// NOLINT
friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; } friend bool operator!=(const Iterator &, const Iterator &) { return false; }
}; };
Iterator begin() const { return {}; } static Iterator begin() { return {}; }
Iterator end() const { return {}; } static Iterator end() { return {}; }
size_t size() const { return 0; } static size_t size() { return 0; }
bool find(const std::string_view &, Element &) const { return false; } bool find(std::string_view, Element &) const { return false; } /// NOLINT
#if 0 #if 0
/// Optional: Provides access to an object's element by index. /// Optional: Provides access to an object's element by index.
@ -91,7 +95,7 @@ struct DummyJSONParser
}; };
/// Parses a JSON document, returns the reference to its root element if succeeded. /// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; } bool parse(std::string_view, Element &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Functions JSON* are not supported"); } /// NOLINT
#if 0 #if 0
/// Optional: Allocates memory to parse JSON documents faster. /// Optional: Allocates memory to parse JSON documents faster.

View File

@ -25,6 +25,8 @@
#cmakedefine01 USE_KRB5 #cmakedefine01 USE_KRB5
#cmakedefine01 USE_JEMALLOC #cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_BZIP2 #cmakedefine01 USE_BZIP2
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON
#cmakedefine01 USE_JAVA_EXTENSIONS #cmakedefine01 USE_JAVA_EXTENSIONS
#cmakedefine01 USE_TSQUERY #cmakedefine01 USE_TSQUERY
#cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_SIMDJSON

View File

@ -150,6 +150,12 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
DB::readBinary(value.toUnderType(), buf); DB::readBinary(value.toUnderType(), buf);
return value; return value;
} }
case Field::Types::Object:
{
Object value;
readBinary(value, buf);
return value;
}
} }
return Field(); return Field();
} }
@ -274,6 +280,40 @@ void writeBinary(const BitMap64 & x, WriteBuffer & buf)
writeString(tmp_buf.data(), bytes, buf); writeString(tmp_buf.data(), bytes, buf);
} }
void readBinary(Object & x, ReadBuffer & buf)
{
size_t size;
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
{
UInt8 type;
String key;
readBinary(type, buf);
readBinary(key, buf);
x[key] = getBinaryValue(type, buf);
}
}
void writeBinary(const Object & x, WriteBuffer & buf)
{
const size_t size = x.size();
writeBinary(size, buf);
for (const auto & [key, value] : x)
{
const UInt8 type = value.getType();
writeBinary(type, buf);
writeBinary(key, buf);
Field::dispatch([&buf] (const auto & val) { FieldVisitorWriteBinary()(val, buf); }, value);
}
}
void writeText(const Object & x, WriteBuffer & buf)
{
writeFieldText(Field(x), buf);
}
template <typename T> template <typename T>
void readQuoted(DecimalField<T> & x, ReadBuffer & buf) void readQuoted(DecimalField<T> & x, ReadBuffer & buf)
{ {

View File

@ -23,6 +23,7 @@
#include <cassert> #include <cassert>
#include <vector> #include <vector>
#include <map>
#include <algorithm> #include <algorithm>
#include <type_traits> #include <type_traits>
#include <functional> #include <functional>
@ -84,6 +85,18 @@ DEFINE_FIELD_VECTOR(Tuple);
#undef DEFINE_FIELD_VECTOR #undef DEFINE_FIELD_VECTOR
using FieldMap = std::map<String, Field, std::less<String>, AllocatorWithMemoryTracking<std::pair<const String, Field>>>;
#define DEFINE_FIELD_MAP(X) \
struct X : public FieldMap \
{ \
using FieldMap::FieldMap; \
}
DEFINE_FIELD_MAP(Object);
#undef DEFINE_FIELD_MAP
struct AggregateFunctionStateData struct AggregateFunctionStateData
{ {
String name; /// Name with arguments. String name; /// Name with arguments.
@ -246,6 +259,7 @@ template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<Map> { using Type = Map; }; template <> struct NearestFieldTypeImpl<Map> { using Type = Map; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; }; template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; }; template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<Object> { using Type = Object; };
template <> struct NearestFieldTypeImpl<IPv4> { using Type = IPv4; }; template <> struct NearestFieldTypeImpl<IPv4> { using Type = IPv4; };
template <> struct NearestFieldTypeImpl<IPv6> { using Type = IPv6; }; template <> struct NearestFieldTypeImpl<IPv6> { using Type = IPv6; };
template <> struct NearestFieldTypeImpl<NegativeInfinity> { using Type = NegativeInfinity; }; template <> struct NearestFieldTypeImpl<NegativeInfinity> { using Type = NegativeInfinity; };
@ -306,6 +320,7 @@ public:
(SketchBinary, 29), (SketchBinary, 29),
(IPv4, 30), (IPv4, 30),
(IPv6, 31), (IPv6, 31),
(Object, 32),
// Special types for index analysis // Special types for index analysis
(NegativeInfinity, 254), (NegativeInfinity, 254),
(PositiveInfinity, 255)); (PositiveInfinity, 255));
@ -519,6 +534,7 @@ public:
case Types::Array: return get<Array>() < rhs.get<Array>(); case Types::Array: return get<Array>() < rhs.get<Array>();
case Types::Tuple: return get<Tuple>() < rhs.get<Tuple>(); case Types::Tuple: return get<Tuple>() < rhs.get<Tuple>();
case Types::Map: return get<Map>() < rhs.get<Map>(); case Types::Map: return get<Map>() < rhs.get<Map>();
case Types::Object: return get<Object>() < rhs.get<Object>();
case Types::Decimal32: return get<DecimalField<Decimal32>>() < rhs.get<DecimalField<Decimal32>>(); case Types::Decimal32: return get<DecimalField<Decimal32>>() < rhs.get<DecimalField<Decimal32>>();
case Types::Decimal64: return get<DecimalField<Decimal64>>() < rhs.get<DecimalField<Decimal64>>(); case Types::Decimal64: return get<DecimalField<Decimal64>>() < rhs.get<DecimalField<Decimal64>>();
case Types::Decimal128: return get<DecimalField<Decimal128>>() < rhs.get<DecimalField<Decimal128>>(); case Types::Decimal128: return get<DecimalField<Decimal128>>() < rhs.get<DecimalField<Decimal128>>();
@ -563,6 +579,7 @@ public:
case Types::Array: return get<Array>() <= rhs.get<Array>(); case Types::Array: return get<Array>() <= rhs.get<Array>();
case Types::Tuple: return get<Tuple>() <= rhs.get<Tuple>(); case Types::Tuple: return get<Tuple>() <= rhs.get<Tuple>();
case Types::Map: return get<Map>() <= rhs.get<Map>(); case Types::Map: return get<Map>() <= rhs.get<Map>();
case Types::Object: return get<Object>() <= rhs.get<Object>();
case Types::Decimal32: return get<DecimalField<Decimal32>>() <= rhs.get<DecimalField<Decimal32>>(); case Types::Decimal32: return get<DecimalField<Decimal32>>() <= rhs.get<DecimalField<Decimal32>>();
case Types::Decimal64: return get<DecimalField<Decimal64>>() <= rhs.get<DecimalField<Decimal64>>(); case Types::Decimal64: return get<DecimalField<Decimal64>>() <= rhs.get<DecimalField<Decimal64>>();
case Types::Decimal128: return get<DecimalField<Decimal128>>() <= rhs.get<DecimalField<Decimal128>>(); case Types::Decimal128: return get<DecimalField<Decimal128>>() <= rhs.get<DecimalField<Decimal128>>();
@ -617,6 +634,7 @@ public:
case Types::Decimal256: return get<DecimalField<Decimal256>>() == rhs.get<DecimalField<Decimal256>>(); case Types::Decimal256: return get<DecimalField<Decimal256>>() == rhs.get<DecimalField<Decimal256>>();
case Types::AggregateFunctionState: return get<AggregateFunctionStateData>() == rhs.get<AggregateFunctionStateData>(); case Types::AggregateFunctionState: return get<AggregateFunctionStateData>() == rhs.get<AggregateFunctionStateData>();
case Types::BitMap64: return get<BitMap64>() == rhs.get<BitMap64>(); case Types::BitMap64: return get<BitMap64>() == rhs.get<BitMap64>();
case Types::Object: return get<Object>() == rhs.get<Object>();
} }
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
@ -663,6 +681,7 @@ public:
case Types::Decimal256: return f(field.template get<DecimalField<Decimal256>>()); case Types::Decimal256: return f(field.template get<DecimalField<Decimal256>>());
case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>()); case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
case Types::BitMap64: return f(field.template get<BitMap64>()); case Types::BitMap64: return f(field.template get<BitMap64>());
case Types::Object: return f(field.template get<Object>());
#if !defined(__clang__) #if !defined(__clang__)
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#endif #endif
@ -726,6 +745,8 @@ public:
return f.template operator()<AggregateFunctionStateData>(); return f.template operator()<AggregateFunctionStateData>();
case Types::BitMap64: case Types::BitMap64:
return f.template operator()<BitMap64>(); return f.template operator()<BitMap64>();
case Types::Object:
return f.template operator()<Object>();
case Types::IPv4: case Types::IPv4:
return f.template operator()<IPv4>(); return f.template operator()<IPv4>();
case Types::IPv6: case Types::IPv6:
@ -885,6 +906,9 @@ private:
case Types::BitMap64: case Types::BitMap64:
destroy<BitMap64>(); destroy<BitMap64>();
break; break;
case Types::Object:
destroy<Object>();
break;
default: default:
break; break;
} }
@ -906,30 +930,31 @@ private:
using Row = std::vector<Field>; using Row = std::vector<Field>;
template <> struct Field::TypeToEnum<Null> { static const Types::Which value = Types::Null; }; template <> struct Field::TypeToEnum<Null> { static constexpr Types::Which value = Types::Null; };
template <> struct Field::TypeToEnum<NegativeInfinity> { static const Types::Which value = Types::NegativeInfinity; }; template <> struct Field::TypeToEnum<NegativeInfinity> { static constexpr Types::Which value = Types::NegativeInfinity; };
template <> struct Field::TypeToEnum<PositiveInfinity> { static const Types::Which value = Types::PositiveInfinity; }; template <> struct Field::TypeToEnum<PositiveInfinity> { static constexpr Types::Which value = Types::PositiveInfinity; };
template <> struct Field::TypeToEnum<UInt64> { static const Types::Which value = Types::UInt64; }; template <> struct Field::TypeToEnum<UInt64> { static constexpr Types::Which value = Types::UInt64; };
template <> struct Field::TypeToEnum<UInt128> { static const Types::Which value = Types::UInt128; }; template <> struct Field::TypeToEnum<UInt128> { static constexpr Types::Which value = Types::UInt128; };
template <> struct Field::TypeToEnum<UInt256> { static const Types::Which value = Types::UInt256; }; template <> struct Field::TypeToEnum<UInt256> { static constexpr Types::Which value = Types::UInt256; };
template <> struct Field::TypeToEnum<Int64> { static const Types::Which value = Types::Int64; }; template <> struct Field::TypeToEnum<Int64> { static constexpr Types::Which value = Types::Int64; };
template <> struct Field::TypeToEnum<Int128> { static const Types::Which value = Types::Int128; }; template <> struct Field::TypeToEnum<Int128> { static constexpr Types::Which value = Types::Int128; };
template <> struct Field::TypeToEnum<Int256> { static const Types::Which value = Types::Int256; }; template <> struct Field::TypeToEnum<Int256> { static constexpr Types::Which value = Types::Int256; };
template <> struct Field::TypeToEnum<UUID> { static const Types::Which value = Types::UUID; }; template <> struct Field::TypeToEnum<UUID> { static constexpr Types::Which value = Types::UUID; };
template <> struct Field::TypeToEnum<IPv4> { static constexpr Types::Which value = Types::IPv4; }; template <> struct Field::TypeToEnum<IPv4> { static constexpr Types::Which value = Types::IPv4; };
template <> struct Field::TypeToEnum<IPv6> { static constexpr Types::Which value = Types::IPv6; }; template <> struct Field::TypeToEnum<IPv6> { static constexpr Types::Which value = Types::IPv6; };
template <> struct Field::TypeToEnum<Float64> { static const Types::Which value = Types::Float64; }; template <> struct Field::TypeToEnum<Float64> { static constexpr Types::Which value = Types::Float64; };
template <> struct Field::TypeToEnum<String> { static const Types::Which value = Types::String; }; template <> struct Field::TypeToEnum<String> { static constexpr Types::Which value = Types::String; };
template <> struct Field::TypeToEnum<Array> { static const Types::Which value = Types::Array; }; template <> struct Field::TypeToEnum<Array> { static constexpr Types::Which value = Types::Array; };
template <> struct Field::TypeToEnum<Tuple> { static const Types::Which value = Types::Tuple; }; template <> struct Field::TypeToEnum<Tuple> { static constexpr Types::Which value = Types::Tuple; };
template <> struct Field::TypeToEnum<Map> { static const Types::Which value = Types::Map; }; template <> struct Field::TypeToEnum<Map> { static constexpr Types::Which value = Types::Map; };
template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static const Types::Which value = Types::Decimal32; }; template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static constexpr Types::Which value = Types::Decimal32; };
template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static const Types::Which value = Types::Decimal64; }; template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static constexpr Types::Which value = Types::Decimal64; };
template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static const Types::Which value = Types::Decimal128; }; template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static constexpr Types::Which value = Types::Decimal128; };
template <> struct Field::TypeToEnum<DecimalField<Decimal256>>{ static const Types::Which value = Types::Decimal256; }; template <> struct Field::TypeToEnum<DecimalField<Decimal256>>{ static constexpr Types::Which value = Types::Decimal256; };
template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static const Types::Which value = Types::Decimal64; }; template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static constexpr Types::Which value = Types::Decimal64; };
template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static const Types::Which value = Types::AggregateFunctionState; }; template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static constexpr Types::Which value = Types::AggregateFunctionState; };
template <> struct Field::TypeToEnum<BitMap64>{ static const Types::Which value = Types::BitMap64; }; template <> struct Field::TypeToEnum<BitMap64>{ static constexpr Types::Which value = Types::BitMap64; };
template <> struct Field::TypeToEnum<Object> { static constexpr Types::Which value = Types::Object; };
template <> struct Field::EnumToType<Field::Types::Null> { using Type = Null; }; template <> struct Field::EnumToType<Field::Types::Null> { using Type = Null; };
template <> struct Field::EnumToType<Field::Types::NegativeInfinity> { using Type = NegativeInfinity; }; template <> struct Field::EnumToType<Field::Types::NegativeInfinity> { using Type = NegativeInfinity; };
@ -948,6 +973,7 @@ template <> struct Field::EnumToType<Field::Types::String> { using Type = Strin
template <> struct Field::EnumToType<Field::Types::Array> { using Type = Array; }; template <> struct Field::EnumToType<Field::Types::Array> { using Type = Array; };
template <> struct Field::EnumToType<Field::Types::Tuple> { using Type = Tuple; }; template <> struct Field::EnumToType<Field::Types::Tuple> { using Type = Tuple; };
template <> struct Field::EnumToType<Field::Types::Map> { using Type = Map; }; template <> struct Field::EnumToType<Field::Types::Map> { using Type = Map; };
template <> struct Field::EnumToType<Field::Types::Object> { using Type = Object; };
template <> struct Field::EnumToType<Field::Types::Decimal32> { using Type = DecimalField<Decimal32>; }; template <> struct Field::EnumToType<Field::Types::Decimal32> { using Type = DecimalField<Decimal32>; };
template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = DecimalField<Decimal64>; }; template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; }; template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
@ -1099,34 +1125,39 @@ class WriteBuffer;
/// It is assumed that all elements of the array have the same type. /// It is assumed that all elements of the array have the same type.
void readBinary(Array & x, ReadBuffer & buf); void readBinary(Array & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Array &, ReadBuffer &) { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void readText(Array &, ReadBuffer &) { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Array &, ReadBuffer &) { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void readQuoted(Array &, ReadBuffer &) { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); }
/// It is assumed that all elements of the array have the same type. /// It is assumed that all elements of the array have the same type.
/// Also write size and type into buf. UInt64 and Int64 is written in variadic size form /// Also write size and type into buf. UInt64 and Int64 is written in variadic size form
void writeBinary(const Array & x, WriteBuffer & buf); void writeBinary(const Array & x, WriteBuffer & buf);
void writeText(const Array & x, WriteBuffer & buf); void writeText(const Array & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Array &, WriteBuffer &) { throw Exception("Cannot write Array quoted.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void writeQuoted(const Array &, WriteBuffer &) { throw Exception("Cannot write Array quoted.", ErrorCodes::NOT_IMPLEMENTED); }
void readBinary(Tuple & x, ReadBuffer & buf); void readBinary(Tuple & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Tuple &, ReadBuffer &) { throw Exception("Cannot read Tuple.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void readText(Tuple &, ReadBuffer &) { throw Exception("Cannot read Tuple.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Tuple &, ReadBuffer &) { throw Exception("Cannot read Tuple.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void readQuoted(Tuple &, ReadBuffer &) { throw Exception("Cannot read Tuple.", ErrorCodes::NOT_IMPLEMENTED); }
void writeBinary(const Tuple & x, WriteBuffer & buf); void writeBinary(const Tuple & x, WriteBuffer & buf);
void writeText(const Tuple & x, WriteBuffer & buf); void writeText(const Tuple & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Tuple &, WriteBuffer &) { throw Exception("Cannot write Tuple quoted.", ErrorCodes::NOT_IMPLEMENTED); }
void readBinary(Map & x, ReadBuffer & buf); void readBinary(Map & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void readText(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void readQuoted(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); }
void writeBinary(const Map & x, WriteBuffer & buf); void writeBinary(const Map & x, WriteBuffer & buf);
void writeText(const Map & x, WriteBuffer & buf); void writeText(const Map & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Map &, WriteBuffer &) { throw Exception("Cannot write Map quoted.", ErrorCodes::NOT_IMPLEMENTED); } [[noreturn]] inline void writeQuoted(const Map &, WriteBuffer &) { throw Exception("Cannot write Map quoted.", ErrorCodes::NOT_IMPLEMENTED); }
void readBinary(Object & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Object &, ReadBuffer &) { throw Exception("Cannot read Object.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Object &, ReadBuffer &) { throw Exception("Cannot read Object.", ErrorCodes::NOT_IMPLEMENTED); }
void writeBinary(const Object & x, WriteBuffer & buf);
void writeText(const Object & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Object &, WriteBuffer &) { throw Exception("Cannot write Object quoted.", ErrorCodes::NOT_IMPLEMENTED); }
__attribute__ ((noreturn)) inline void writeText(const AggregateFunctionStateData &, WriteBuffer &) __attribute__ ((noreturn)) inline void writeText(const AggregateFunctionStateData &, WriteBuffer &)
{ {
// This probably doesn't make any sense, but we have to have it for // This probably doesn't make any sense, but we have to have it for
@ -1156,8 +1187,6 @@ void readFieldBinary(Field & field, ReadBuffer & buf);
void writeFieldBinary(const Field & field, WriteBuffer & buf); void writeFieldBinary(const Field & field, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Tuple &, WriteBuffer &) { throw Exception("Cannot write Tuple quoted.", ErrorCodes::NOT_IMPLEMENTED); }
String toString(const Field & x); String toString(const Field & x);
} }

View File

@ -340,7 +340,6 @@ enum PreloadLevelSettings : UInt64
M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \ M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \
M(Bool, allow_suspicious_low_cardinality_types, true, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \ M(Bool, allow_suspicious_low_cardinality_types, true, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \ M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \ M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \ M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \ M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
@ -1253,6 +1252,9 @@ enum PreloadLevelSettings : UInt64
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \ M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, true, "Allow experimental window functions", 0) \ M(Bool, allow_experimental_window_functions, true, "Allow experimental window functions", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \

View File

@ -47,6 +47,7 @@ TYPEID_MAP(Int256)
TYPEID_MAP(Float32) TYPEID_MAP(Float32)
TYPEID_MAP(Float64) TYPEID_MAP(Float64)
TYPEID_MAP(UUID) TYPEID_MAP(UUID)
TYPEID_MAP(IPv4) TYPEID_MAP(IPv4)
TYPEID_MAP(IPv6) TYPEID_MAP(IPv6)

View File

@ -88,6 +88,7 @@ enum class TypeIndex
BitMap64, BitMap64,
Time, Time,
SketchBinary, SketchBinary,
Object,
IPv4, IPv4,
IPv6, IPv6,
}; };
@ -340,6 +341,7 @@ inline constexpr const char * getTypeName(TypeIndex idx)
case TypeIndex::Map: return "Map"; case TypeIndex::Map: return "Map";
case TypeIndex::BitMap64: return "BitMap64"; case TypeIndex::BitMap64: return "BitMap64";
case TypeIndex::SketchBinary: return "SketchBinary"; case TypeIndex::SketchBinary: return "SketchBinary";
case TypeIndex::Object: return "Object";
} }
__builtin_unreachable(); __builtin_unreachable();

View File

@ -16,4 +16,6 @@
#cmakedefine01 USE_NURAFT #cmakedefine01 USE_NURAFT
#cmakedefine01 USE_KRB5 #cmakedefine01 USE_KRB5
#cmakedefine01 USE_JEMALLOC #cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON
#cmakedefine01 USE_NLP #cmakedefine01 USE_NLP

View File

@ -126,6 +126,7 @@ IBGJobStatusPersistentStoreProxy::CacheClearer CatalogBGJobStatusPersistentStore
if (catalog) // catalog is nullptr in unittest if (catalog) // catalog is nullptr in unittest
statuses_cache = catalog->getBGJobStatuses(type); statuses_cache = catalog->getBGJobStatuses(type);
is_cache_prefetched = true; is_cache_prefetched = true;
return CacheClearer{this}; return CacheClearer{this};
} }

View File

@ -53,6 +53,8 @@ bvar::Adder<int> & getExecuteMetric(CnchBGThreadType type)
return g_executeImpl_TxnGC; return g_executeImpl_TxnGC;
case CnchBGThreadType::Clustering: case CnchBGThreadType::Clustering:
return g_executeImpl_Clustering; return g_executeImpl_Clustering;
case CnchBGThreadType::ObjectSchemaAssemble:
return g_executeImpl_ObjectSchemaAssemble;
case CnchBGThreadType::MaterializedMySQL: case CnchBGThreadType::MaterializedMySQL:
return g_executeImpl_MaterializedMySQL; return g_executeImpl_MaterializedMySQL;
default: default:
@ -81,6 +83,8 @@ bvar::Adder<int> & getExecuteErrorMetric(CnchBGThreadType type)
return g_executeImpl_TxnGC_error; return g_executeImpl_TxnGC_error;
case CnchBGThreadType::Clustering: case CnchBGThreadType::Clustering:
return g_executeImpl_Clustering_error; return g_executeImpl_Clustering_error;
case CnchBGThreadType::ObjectSchemaAssemble:
return g_executeImpl_ObjectSchemaAssemble_error;
case CnchBGThreadType::MaterializedMySQL: case CnchBGThreadType::MaterializedMySQL:
return g_executeImpl_MaterializedMySQL_error; return g_executeImpl_MaterializedMySQL_error;
default: default:

View File

@ -1138,6 +1138,7 @@ void registerServerBGThreads(DaemonFactory & factory)
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::Clustering, isCnchMergeTree>>("PART_CLUSTERING"); factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::Clustering, isCnchMergeTree>>("PART_CLUSTERING");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::Consumer, isCnchKafka>>("CONSUMER"); factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::Consumer, isCnchKafka>>("CONSUMER");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::DedupWorker, isCnchUniqueTableAndNeedDedup>>("DEDUP_WORKER"); factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::DedupWorker, isCnchUniqueTableAndNeedDedup>>("DEDUP_WORKER");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::ObjectSchemaAssemble, isCnchMergeTree>>("OBJECT_SCHEMA_ASSEMBLE");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::MaterializedMySQL, isMaterializedMySQL>>("MATERIALIZED_MYSQL"); factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::MaterializedMySQL, isMaterializedMySQL>>("MATERIALIZED_MYSQL");
} }

View File

@ -179,6 +179,7 @@ std::unordered_map<CnchBGThreadType, DaemonJobServerBGThreadPtr> createDaemonJob
{ "CONSUMER", 10000}, { "CONSUMER", 10000},
{ "DEDUP_WORKER", 10000}, { "DEDUP_WORKER", 10000},
{ "PART_CLUSTERING", 10000}, { "PART_CLUSTERING", 10000},
{ "OBJECT_SCHEMA_ASSEMBLE", 10000},
{ "MATERIALIZED_MYSQL", 10000} { "MATERIALIZED_MYSQL", 10000}
}; };

View File

@ -63,6 +63,11 @@ namespace DB::DaemonManager::BRPCMetrics
bvar::Window<bvar::Adder<int>> g_executeImpl_Clustering_error_minute("DaemonManager_Internal", "executeImpl_Clustering_error", & g_executeImpl_Clustering_error, 60); bvar::Window<bvar::Adder<int>> g_executeImpl_Clustering_error_minute("DaemonManager_Internal", "executeImpl_Clustering_error", & g_executeImpl_Clustering_error, 60);
bvar::Window<bvar::Adder<int>> g_executeImpl_Clustering_minute("DaemonManager_Internal", "executeImpl_Clustering", & g_executeImpl_Clustering, 60); bvar::Window<bvar::Adder<int>> g_executeImpl_Clustering_minute("DaemonManager_Internal", "executeImpl_Clustering", & g_executeImpl_Clustering, 60);
bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble_error;
bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble;
bvar::Window<bvar::Adder<int>> g_executeImpl_ObjectSchemaAssemble_error_minute("DaemonManager_Internal", "executeImpl_ObjectSchemaAssemble_error", & g_executeImpl_ObjectSchemaAssemble_error, 60);
bvar::Window<bvar::Adder<int>> g_executeImpl_ObjectSchemaAssemble_minute("DaemonManager_Internal", "executeImpl_ObjectSchemaAssemble", & g_executeImpl_ObjectSchemaAssemble, 60);
bvar::Adder< int > g_executeImpl_MaterializedMySQL_error; bvar::Adder< int > g_executeImpl_MaterializedMySQL_error;
bvar::Adder< int > g_executeImpl_MaterializedMySQL; bvar::Adder< int > g_executeImpl_MaterializedMySQL;
bvar::Window<bvar::Adder<int>> g_executeImpl_MaterializedMySQL_error_minute("DaemonManager_Internal", "executeImpl_MaterializedMySQL_error", & g_executeImpl_MaterializedMySQL_error, 60); bvar::Window<bvar::Adder<int>> g_executeImpl_MaterializedMySQL_error_minute("DaemonManager_Internal", "executeImpl_MaterializedMySQL_error", & g_executeImpl_MaterializedMySQL_error, 60);

View File

@ -37,6 +37,8 @@ namespace DB::DaemonManager::BRPCMetrics
extern bvar::Adder< int > g_executeImpl_TxnGC; extern bvar::Adder< int > g_executeImpl_TxnGC;
extern bvar::Adder< int > g_executeImpl_Clustering_error; extern bvar::Adder< int > g_executeImpl_Clustering_error;
extern bvar::Adder< int > g_executeImpl_Clustering; extern bvar::Adder< int > g_executeImpl_Clustering;
extern bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble_error;
extern bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble;
extern bvar::Adder< int > g_executeImpl_MaterializedMySQL_error; extern bvar::Adder< int > g_executeImpl_MaterializedMySQL_error;
extern bvar::Adder< int > g_executeImpl_MaterializedMySQL; extern bvar::Adder< int > g_executeImpl_MaterializedMySQL;
}/// end namespace }/// end namespace

View File

@ -85,7 +85,7 @@ static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuf
auto full_type = lc_type->getFullLowCardinalityTypePtr(); auto full_type = lc_type->getFullLowCardinalityTypePtr();
auto serialization = full_type->getDefaultSerialization(); auto serialization = full_type->getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state); serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state); serialization->serializeBinaryBulkStateSuffix(settings, state);
return ; return ;
@ -95,7 +95,7 @@ static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuf
auto serialization = type.getDefaultSerialization(); auto serialization = type.getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state); serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state); serialization->serializeBinaryBulkStateSuffix(settings, state);
} }

View File

@ -524,12 +524,13 @@ void RemoteQueryExecutor::sendExternalTables()
{ {
SelectQueryInfo query_info; SelectQueryInfo query_info;
auto metadata_snapshot = cur->getInMemoryMetadataPtr(); auto metadata_snapshot = cur->getInMemoryMetadataPtr();
auto storage_snapshot = cur->getStorageSnapshot(metadata_snapshot, context);
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage( QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
context, QueryProcessingStage::Complete, metadata_snapshot, query_info); context, QueryProcessingStage::Complete, storage_snapshot, query_info);
Pipe pipe = cur->read( Pipe pipe = cur->read(
metadata_snapshot->getColumns().getNamesOfPhysical(), metadata_snapshot->getColumns().getNamesOfPhysical(),
metadata_snapshot, query_info, context, storage_snapshot, query_info, context,
read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
if (pipe.empty()) if (pipe.empty())

View File

@ -87,7 +87,7 @@ public:
/// Query is resent to a replica, the query itself can be modified. /// Query is resent to a replica, the query itself can be modified.
std::atomic<bool> resent_query { false }; std::atomic<bool> resent_query { false };
/// Read next block of data. Returns empty block if query is finished. /// Read next block of data. Returns empty block if query is finished.
Block read(); Block read();

View File

@ -1,3 +1,5 @@
add_subdirectory (Serializations)
if (ENABLE_EXAMPLES) if (ENABLE_EXAMPLES)
add_subdirectory(examples) add_subdirectory(examples)
endif () endif ()

View File

@ -95,26 +95,26 @@ ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const I
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr()); return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
} }
SerializationPtr DataTypeArray::getSubcolumnSerialization( // SerializationPtr DataTypeArray::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{ // {
return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0); // return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0);
} // }
SerializationPtr DataTypeArray::getSubcolumnSerializationImpl( // SerializationPtr DataTypeArray::getSubcolumnSerializationImpl(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const
{ // {
if (subcolumn_name == "size" + std::to_string(level)) // if (subcolumn_name == "size" + std::to_string(level))
return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false); // return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false);
SerializationPtr subcolumn; // SerializationPtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get())) // if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1); // subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1);
else // else
subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); // subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
return std::make_shared<SerializationArray>(subcolumn); // return std::make_shared<SerializationArray>(subcolumn);
} // }
SerializationPtr DataTypeArray::doGetDefaultSerialization() const SerializationPtr DataTypeArray::doGetDefaultSerialization() const
{ {

View File

@ -78,8 +78,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization( // SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
SerializationPtr doGetDefaultSerialization() const override; SerializationPtr doGetDefaultSerialization() const override;

View File

@ -27,6 +27,8 @@ public:
bool isCategorial() const override { return true; } bool isCategorial() const override { return true; }
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool isComparable() const override { return true; } bool isComparable() const override { return true; }
virtual bool contains(const IDataType & rhs) const = 0;
bool canBeMapKeyType() const override { return true; } bool canBeMapKeyType() const override { return true; }
}; };
@ -78,7 +80,7 @@ public:
/// Example: /// Example:
/// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK /// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK
/// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK /// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK
bool contains(const IDataType & rhs) const; bool contains(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override; SerializationPtr doGetDefaultSerialization() const override;

View File

@ -288,6 +288,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeBitMap64(*this); registerDataTypeBitMap64(*this);
registerDataTypeSketchBinary(*this); registerDataTypeSketchBinary(*this);
registerDataTypeDomainBool(*this); registerDataTypeDomainBool(*this);
registerDataTypeObject(*this);
} }
DataTypeFactory & DataTypeFactory::instance() DataTypeFactory & DataTypeFactory::instance()

View File

@ -113,5 +113,6 @@ void registerDataTypeBitMap64(DataTypeFactory & factory);
void registerDataTypeSet(DataTypeFactory & factory); void registerDataTypeSet(DataTypeFactory & factory);
void registerDataTypeSketchBinary(DataTypeFactory & factory); void registerDataTypeSketchBinary(DataTypeFactory & factory);
void registerDataTypeDomainBool(DataTypeFactory & factory); void registerDataTypeDomainBool(DataTypeFactory & factory);
void registerDataTypeObject(DataTypeFactory & factory);
} }

View File

@ -32,6 +32,27 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
} }
DataTypeMap::DataTypeMap(const DataTypePtr & nested_)
: nested(nested_)
{
const auto * type_array = typeid_cast<const DataTypeArray *>(nested.get());
if (!type_array)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_array->getNestedType().get());
if (!type_tuple)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
if (type_tuple->getElements().size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
key_type = type_tuple->getElement(0);
value_type = type_tuple->getElement(1);
checkKeyType();
}
DataTypeMap::DataTypeMap(const DataTypes & elems_) DataTypeMap::DataTypeMap(const DataTypes & elems_)
{ {
@ -93,11 +114,11 @@ ColumnPtr DataTypeMap::getSubcolumn(const String & subcolumn_name, const IColumn
return nested->getSubcolumn(subcolumn_name, extractNestedColumn(column)); return nested->getSubcolumn(subcolumn_name, extractNestedColumn(column));
} }
SerializationPtr DataTypeMap::getSubcolumnSerialization( // SerializationPtr DataTypeMap::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{ // {
return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); // return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
} // }
MutableColumnPtr DataTypeMap::createColumn() const MutableColumnPtr DataTypeMap::createColumn() const
{ {

View File

@ -31,6 +31,7 @@ private:
public: public:
static constexpr bool is_parametric = true; static constexpr bool is_parametric = true;
explicit DataTypeMap(const DataTypePtr & nested_);
DataTypeMap(const DataTypes & elems); DataTypeMap(const DataTypes & elems);
DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_); DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_);
@ -42,8 +43,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization( // SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
MutableColumnPtr createColumn() const override; MutableColumnPtr createColumn() const override;
@ -73,7 +74,6 @@ public:
private: private:
void checkKeyType() const; void checkKeyType() const;
bool isMapKVStore() const { return flags & TYPE_MAP_KV_STORE_FLAG;} bool isMapKVStore() const { return flags & TYPE_MAP_KV_STORE_FLAG;}
bool isMapByteStore() const { return flags & TYPE_MAP_BYTE_STORE_FLAG; } bool isMapByteStore() const { return flags & TYPE_MAP_BYTE_STORE_FLAG; }
}; };

View File

@ -81,14 +81,14 @@ ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IC
return nested_data_type->getSubcolumn(subcolumn_name, column_nullable.getNestedColumn()); return nested_data_type->getSubcolumn(subcolumn_name, column_nullable.getNestedColumn());
} }
SerializationPtr DataTypeNullable::getSubcolumnSerialization( // SerializationPtr DataTypeNullable::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{ // {
if (subcolumn_name == "null") // if (subcolumn_name == "null")
return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt8()), subcolumn_name, false); // return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt8()), subcolumn_name, false);
return nested_data_type->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); // return nested_data_type->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
} // }
SerializationPtr DataTypeNullable::doGetDefaultSerialization() const SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
{ {

View File

@ -68,8 +68,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization( // SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
const DataTypePtr & getNestedType() const { return nested_data_type; } const DataTypePtr & getNestedType() const { return nested_data_type; }

View File

@ -0,0 +1,82 @@
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationObject.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <IO/Operators.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNEXPECTED_AST_STRUCTURE;
}
DataTypeObject::DataTypeObject(const String & schema_format_, bool is_nullable_)
: schema_format(Poco::toLower(schema_format_))
, is_nullable(is_nullable_)
{
}
bool DataTypeObject::equals(const IDataType & rhs) const
{
if (const auto * object = typeid_cast<const DataTypeObject *>(&rhs))
return schema_format == object->schema_format && is_nullable == object->is_nullable;
return false;
}
SerializationPtr DataTypeObject::doGetDefaultSerialization() const
{
return getObjectSerialization(schema_format);
}
String DataTypeObject::doGetName() const
{
WriteBufferFromOwnString out;
if (is_nullable)
out << "Object(Nullable(" << quote << schema_format << "))";
else
out << "Object(" << quote << schema_format << ")";
return out.str();
}
static DataTypePtr create(const ASTPtr & arguments)
{
if (!arguments || arguments->children.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Object data type family must have one argument - name of schema format");
ASTPtr schema_argument = arguments->children[0];
bool is_nullable = false;
if (const auto * func = schema_argument->as<ASTFunction>())
{
if (func->name != "Nullable" || func->arguments->children.size() != 1)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
"Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", func->name);
schema_argument = func->arguments->children[0];
is_nullable = true;
}
const auto * literal = schema_argument->as<ASTLiteral>();
if (!literal || literal->value.getType() != Field::Types::String)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
"Object data type family must have a const string as its schema name parameter");
return std::make_shared<DataTypeObject>(literal->value.get<const String &>(), is_nullable);
}
void registerDataTypeObject(DataTypeFactory & factory)
{
factory.registerDataType("Object", create);
factory.registerSimpleDataType("JSON",
[] { return std::make_shared<DataTypeObject>("JSON", false); },
DataTypeFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <Core/Field.h>
#include <Columns/ColumnObject.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
class DataTypeObject : public IDataType
{
private:
String schema_format;
bool is_nullable;
public:
DataTypeObject(const String & schema_format_, bool is_nullable_);
const char * getFamilyName() const override { return "Object"; }
String doGetName() const override;
TypeIndex getTypeId() const override { return TypeIndex::Object; }
MutableColumnPtr createColumn() const override { return ColumnObject::create(is_nullable); }
Field getDefault() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDefault() is not implemented for data type {}", getName());
}
bool haveSubtypes() const override { return false; }
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return true; }
bool hasDynamicSubcolumns() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override;
bool hasNullableSubcolumns() const { return is_nullable; }
const String & getSchemaFormat() const { return schema_format; }
};
}

View File

@ -291,25 +291,25 @@ ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColu
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
} }
SerializationPtr DataTypeTuple::getSubcolumnSerialization( // SerializationPtr DataTypeTuple::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{ // {
auto on_success = [&](size_t pos) // auto on_success = [&](size_t pos)
{ // {
return std::make_shared<SerializationTupleElement>(base_serialization_getter(*elems[pos]), names[pos]); // return std::make_shared<SerializationTupleElement>(base_serialization_getter(*elems[pos]), names[pos]);
}; // };
auto on_continue = [&](size_t pos, const String & next_subcolumn) // auto on_continue = [&](size_t pos, const String & next_subcolumn)
{ // {
auto next_serialization = elems[pos]->getSubcolumnSerialization(next_subcolumn, base_serialization_getter); // auto next_serialization = elems[pos]->getSubcolumnSerialization(next_subcolumn, base_serialization_getter);
return std::make_shared<SerializationTupleElement>(next_serialization, names[pos]); // return std::make_shared<SerializationTupleElement>(next_serialization, names[pos]);
}; // };
if (auto serialization = getSubcolumnEntity(subcolumn_name, on_success, on_continue)) // if (auto serialization = getSubcolumnEntity(subcolumn_name, on_success, on_continue))
return serialization; // return serialization;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); // throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
} // }
SerializationPtr DataTypeTuple::doGetDefaultSerialization() const SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
@ -320,7 +320,7 @@ SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
{ {
String elem_name = use_explicit_names ? names[i] : toString(i + 1); String elem_name = use_explicit_names ? names[i] : toString(i + 1);
auto serialization = elems[i]->getDefaultSerialization(); auto serialization = elems[i]->getDefaultSerialization();
serializations[i] = std::make_shared<SerializationTupleElement>(serialization, elem_name); serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
} }
return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names); return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names);
@ -335,7 +335,7 @@ SerializationPtr DataTypeTuple::getSerialization(const String & column_name, con
String elem_name = use_explicit_names ? names[i] : toString(i + 1); String elem_name = use_explicit_names ? names[i] : toString(i + 1);
auto subcolumn_name = Nested::concatenateName(column_name, elem_name); auto subcolumn_name = Nested::concatenateName(column_name, elem_name);
auto serializaion = elems[i]->getSerialization(subcolumn_name, callback); auto serializaion = elems[i]->getSerialization(subcolumn_name, callback);
serializations[i] = std::make_shared<SerializationTupleElement>(serializaion, elem_name); serializations[i] = std::make_shared<SerializationNamed>(serializaion, elem_name);
} }
return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names); return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names);

View File

@ -57,8 +57,8 @@ public:
SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const override; SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const override;
SerializationPtr getSubcolumnSerialization( // SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; // const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
SerializationPtr doGetDefaultSerialization() const override; SerializationPtr doGetDefaultSerialization() const override;

View File

@ -31,6 +31,7 @@
#include <DataTypes/DataTypeUUID.h> #include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeIPv4andIPv6.h> #include <DataTypes/DataTypeIPv4andIPv6.h>
#include <DataTypes/DataTypeBitMap64.h> #include <DataTypes/DataTypeBitMap64.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/getLeastSupertype.h> #include <DataTypes/getLeastSupertype.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <Common/Exception.h> #include <Common/Exception.h>
@ -152,9 +153,9 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const
element_types.reserve(x.size()); element_types.reserve(x.size());
for (const Field & elem : x) for (const Field & elem : x)
element_types.emplace_back(applyVisitor(FieldToDataType(), elem)); element_types.emplace_back(applyVisitor(FieldToDataType(allow_convertion_to_string), elem));
return std::make_shared<DataTypeArray>(getLeastSupertype(element_types)); return std::make_shared<DataTypeArray>(getLeastSupertype(element_types, allow_convertion_to_string));
} }
@ -167,7 +168,7 @@ DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const
element_types.reserve(tuple.size()); element_types.reserve(tuple.size());
for (const auto & element : tuple) for (const auto & element : tuple)
element_types.push_back(applyVisitor(FieldToDataType(), element)); element_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), element));
return std::make_shared<DataTypeTuple>(element_types); return std::make_shared<DataTypeTuple>(element_types);
} }
@ -185,7 +186,9 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const
value_types.push_back(applyVisitor(FieldToDataType(), elem.second)); value_types.push_back(applyVisitor(FieldToDataType(), elem.second));
} }
return std::make_shared<DataTypeMap>(getLeastSupertype(key_types), getLeastSupertype(value_types)); return std::make_shared<DataTypeMap>(
getLeastSupertype(key_types, allow_convertion_to_string),
getLeastSupertype(value_types, allow_convertion_to_string));
} }
DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const
@ -199,4 +202,10 @@ DataTypePtr FieldToDataType::operator() (const BitMap64 &) const
return std::make_shared<DataTypeBitMap64>(); return std::make_shared<DataTypeBitMap64>();
} }
DataTypePtr FieldToDataType::operator() (const Object &) const
{
/// TODO: Do we need different parameters for type Object?
return std::make_shared<DataTypeObject>("json", false);
}
} }

View File

@ -41,6 +41,11 @@ using DataTypePtr = std::shared_ptr<const IDataType>;
class FieldToDataType : public StaticVisitor<DataTypePtr> class FieldToDataType : public StaticVisitor<DataTypePtr>
{ {
public: public:
FieldToDataType(bool allow_convertion_to_string_ = false)
: allow_convertion_to_string(allow_convertion_to_string_)
{
}
DataTypePtr operator() (const Null & x) const; DataTypePtr operator() (const Null & x) const;
DataTypePtr operator() (const NegativeInfinity & x) const; DataTypePtr operator() (const NegativeInfinity & x) const;
DataTypePtr operator() (const PositiveInfinity & x) const; DataTypePtr operator() (const PositiveInfinity & x) const;
@ -64,6 +69,10 @@ public:
DataTypePtr operator() (const DecimalField<Decimal256> & x) const; DataTypePtr operator() (const DecimalField<Decimal256> & x) const;
DataTypePtr operator() (const AggregateFunctionStateData & x) const; DataTypePtr operator() (const AggregateFunctionStateData & x) const;
DataTypePtr operator() (const BitMap64 & x) const; DataTypePtr operator() (const BitMap64 & x) const;
DataTypePtr operator() (const Object & map) const;
private:
bool allow_convertion_to_string;
}; };
} }

View File

@ -33,6 +33,7 @@
#include <DataTypes/DataTypeCustom.h> #include <DataTypes/DataTypeCustom.h>
#include <DataTypes/NestedUtils.h> #include <DataTypes/NestedUtils.h>
#include <DataTypes/Serializations/SerializationTupleElement.h> #include <DataTypes/Serializations/SerializationTupleElement.h>
#include <DataTypes/Serializations/SerializationInfo.h>
#include <Storages/MergeTree/MergeTreeSuffix.h> #include <Storages/MergeTree/MergeTreeSuffix.h>
@ -117,6 +118,50 @@ size_t IDataType::getSizeOfValueInMemory() const
throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR); throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR);
} }
void IDataType::forEachSubcolumn(
const SubcolumnCallback & callback,
const SubstreamData & data)
{
ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
{
for (size_t i = 0; i < subpath.size(); ++i)
{
size_t prefix_len = i + 1;
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
{
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
auto subdata = ISerialization::createFromPath(subpath, prefix_len);
callback(subpath, name, subdata);
}
subpath[i].visited = true;
}
};
ISerialization::EnumerateStreamsSettings settings;
settings.position_independent_encoding = false;
data.serialization->enumerateStreams(settings, callback_with_data, data);
}
template <typename Ptr>
Ptr IDataType::getForSubcolumn(
const String & subcolumn_name,
const SubstreamData & data,
Ptr SubstreamData::*member,
bool throw_if_null) const
{
Ptr res;
forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata)
{
if (name == subcolumn_name)
res = subdata.*member;
}, data);
if (!res && throw_if_null)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
return res;
}
DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
{ {
if (subcolumn_name == MAIN_SUBCOLUMN_NAME) if (subcolumn_name == MAIN_SUBCOLUMN_NAME)
@ -127,11 +172,17 @@ DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
{ {
auto subcolumn_type = tryGetSubcolumnType(subcolumn_name); if (subcolumn_name == MAIN_SUBCOLUMN_NAME)
if (subcolumn_type) return shared_from_this();
return subcolumn_type;
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
}
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); ColumnPtr IDataType::tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
{
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
} }
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const
@ -141,21 +192,12 @@ ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &
Names IDataType::getSubcolumnNames() const Names IDataType::getSubcolumnNames() const
{ {
NameSet res; Names res;
getDefaultSerialization()->enumerateStreams([&res, this](const ISerialization::SubstreamPath & substream_path) forEachSubcolumn([&](const auto &, const auto & name, const auto &)
{ {
ISerialization::SubstreamPath new_path; res.push_back(name);
/// Iterate over path to try to get intermediate subcolumns for complex nested types. }, SubstreamData(getDefaultSerialization()));
for (const auto & elem : substream_path) return res;
{
new_path.push_back(elem);
auto subcolumn_name = ISerialization::getSubcolumnNameForStream(new_path);
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
res.insert(subcolumn_name);
}
});
return Names(std::make_move_iterator(res.begin()), std::make_move_iterator(res.end()));
} }
void IDataType::insertDefaultInto(IColumn & column) const void IDataType::insertDefaultInto(IColumn & column) const
@ -173,6 +215,14 @@ void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
custom_serialization = std::move(custom_desc_->serialization); custom_serialization = std::move(custom_desc_->serialization);
} }
SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
{
if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
return getSerializationInfo(column_const->getDataColumn());
return std::make_shared<SerializationInfo>(ISerialization::getKind(column), SerializationInfo::Settings{});
}
SerializationPtr IDataType::getDefaultSerialization() const SerializationPtr IDataType::getDefaultSerialization() const
{ {
if (custom_serialization) if (custom_serialization)
@ -181,9 +231,23 @@ SerializationPtr IDataType::getDefaultSerialization() const
return doGetDefaultSerialization(); return doGetDefaultSerialization();
} }
SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const BaseSerializationGetter &) const SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const
{ {
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); auto data = SubstreamData(serialization);
return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
}
SerializationPtr IDataType::getSerialization(ISerialization::Kind /*kind*/) const
{
// if (supportsSparseSerialization() && kind == ISerialization::Kind::SPARSE)
// return getSparseSerialization();
return getDefaultSerialization();
}
SerializationPtr IDataType::getSerialization(const SerializationInfo & info) const
{
return getSerialization(info.getKind());
} }
// static // static
@ -191,14 +255,9 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, con
{ {
if (column.isSubcolumn()) if (column.isSubcolumn())
{ {
/// Wrap to custom serialization deepest subcolumn, which is represented in non-complex type. const auto & type_in_storage = column.getTypeInStorage();
auto base_serialization_getter = [&](const IDataType & subcolumn_type) auto default_serialization = type_in_storage->getDefaultSerialization();
{ return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), default_serialization);
return subcolumn_type.getSerialization(column.name, callback);
};
auto type_in_storage = column.getTypeInStorage();
return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), base_serialization_getter);
} }
return column.type->getSerialization(column.name, callback); return column.type->getSerialization(column.name, callback);

Some files were not shown because too many files have changed in this diff Show More