Daily cherry-pick from internal (#1099)

* Merge branch 'support_json_type' into 'cnch-ce-merge'

feat(clickhousech@m-13539135):support json type

See merge request dp/ClickHouse!15288
# Conflicts:
#	src/Analyzers/QueryRewriter.cpp
#	src/Catalog/Catalog.cpp
#	src/Catalog/Catalog.h
#	src/CloudServices/CnchServerServiceImpl.cpp
#	src/CloudServices/CnchWorkerResource.cpp
#	src/CloudServices/CnchWorkerServiceImpl.cpp
#	src/Common/config.h.in
#	src/DataStreams/ReadDictInputStream.cpp
#	src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
#	src/Interpreters/DatabaseCatalog.cpp
#	src/Interpreters/TreeRewriter.cpp
#	src/MergeTreeCommon/MergeTreeMetaBase.h
#	src/Protos/DataModelHelpers.cpp
#	src/Protos/cnch_worker_rpc.proto
#	src/QueryPlan/PlanSegmentSourceStep.cpp
#	src/QueryPlan/TableScanStep.cpp
#	src/Storages/BitEngine/BitEngineHelper.cpp
#	src/Storages/BitEngineEncodePartitionHelper.cpp
#	src/Storages/MergeTree/MergeTreeSequentialSource.cpp
#	src/Storages/StorageCnchMergeTree.cpp
#	src/Storages/StorageCnchMergeTree.h
#	src/WorkerTasks/MergeTreeDataMerger.cpp
#	tests/queries/0_ce_problematic_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql
This commit is contained in:
Dao Minh Thuc 2024-01-26 10:38:39 +08:00 committed by GitHub
parent cc4b38d2c5
commit de5bd273f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
412 changed files with 9968 additions and 1515 deletions

View File

@ -0,0 +1,38 @@
#pragma once
#include <magic_enum.hpp>
#include <fmt/format.h>
template <class T> concept is_enum = std::is_enum_v<T>;
namespace detail
{
template <is_enum E, class F, size_t ...I>
constexpr void static_for(F && f, std::index_sequence<I...>)
{
(std::forward<F>(f)(std::integral_constant<E, magic_enum::enum_value<E>(I)>()) , ...);
}
}
/**
* Iterate over enum values in compile-time (compile-time switch/case, loop unrolling).
*
* @example static_for<E>([](auto enum_value) { return template_func<enum_value>(); }
* ^ enum_value can be used as a template parameter
*/
template <is_enum E, class F>
constexpr void static_for(F && f)
{
constexpr size_t count = magic_enum::enum_count<E>();
detail::static_for<E>(std::forward<F>(f), std::make_index_sequence<count>());
}
/// Enable printing enum values as strings via fmt + magic_enum
template <is_enum T>
struct fmt::formatter<T> : fmt::formatter<std::string_view>
{
constexpr auto format(T value, auto& format_context)
{
return formatter<string_view>::format(magic_enum::enum_name(value), format_context);
}
};

View File

@ -328,6 +328,10 @@ if (USE_SIMDJSON)
add_subdirectory (simdjson-cmake)
endif()
if (USE_RAPIDJSON)
add_subdirectory (rapidjson-cmake)
endif()
if (USE_BREAKPAD)
add_subdirectory(breakpad-cmake)
endif()

View File

@ -0,0 +1,4 @@
set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include")
add_library(rapidjson INTERFACE)
target_include_directories(rapidjson BEFORE INTERFACE "${RAPIDJSON_INCLUDE_DIR}")

View File

@ -655,7 +655,7 @@ void ExprAnalyzerVisitor::processSubqueryArgsWithCoercion(ASTPtr & lhs_ast, ASTP
{
DataTypePtr super_type = nullptr;
if (enable_implicit_type_conversion)
super_type = getLeastSupertype({lhs_type, rhs_type}, allow_extended_conversion);
super_type = getLeastSupertype(DataTypes{lhs_type, rhs_type}, allow_extended_conversion);
if (!super_type)
throw Exception("Incompatible types for IN prediacte", ErrorCodes::TYPE_MISMATCH);
if (!lhs_type->equals(*super_type))

View File

@ -869,7 +869,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing(ASTTableJoin & table_join, Scope
{
try
{
output_type = getLeastSupertype({left_type, right_type}, allow_extended_conversion);
output_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion);
}
catch (DB::Exception & ex)
{
@ -961,7 +961,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing(ASTTableJoin & table_join, Scope
{
try
{
output_type = getLeastSupertype({left_type, right_type}, allow_extended_conversion);
output_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion);
}
catch (DB::Exception & ex)
{
@ -1159,7 +1159,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinOn(ASTTableJoin & table_join, ScopePtr
{
try
{
super_type = getLeastSupertype({left_type, right_type}, allow_extended_conversion);
super_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion);
}
catch (DB::Exception & ex)
{

View File

@ -44,6 +44,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/StorageSnapshot.h>
namespace DB
{
@ -433,7 +434,10 @@ namespace
}
StoragePtr storage = joined_tables.getLeftTableStorage();
rewrite_context.result.emplace(source_columns, storage, storage ? storage->getInMemoryMetadataPtr() : nullptr);
StorageSnapshotPtr storage_snapshot = nullptr;
if (storage)
storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
rewrite_context.result.emplace(source_columns, storage, storage_snapshot);
auto & result = *(rewrite_context.result);
if (tables_with_columns.size() > 1)
@ -486,8 +490,15 @@ namespace
for (const auto & col : result.analyzed_join->columnsFromJoinedTable())
all_source_columns_set.insert(col.name);
}
normalizeNameAndAliases(node, result.aliases, all_source_columns_set, settings, context, result.storage,
result.metadata_snapshot, graphviz_index);
normalizeNameAndAliases(
node,
result.aliases,
all_source_columns_set,
settings,
context,
result.storage,
result.storage_snapshot ? result.storage_snapshot->metadata : nullptr,
graphviz_index);
}
// 5. Call `TreeOptimizer` since some optimizations will change the query result

View File

@ -613,6 +613,14 @@ if (USE_LIBPQXX)
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${LIBPQXX_INCLUDE_DIR})
endif()
if (USE_SIMDJSON)
dbms_target_link_libraries(PUBLIC simdjson)
endif()
if (USE_RAPIDJSON)
dbms_target_link_libraries(PUBLIC rapidjson)
endif()
if(USE_CPP_JIEBA)
dbms_target_link_libraries(PUBLIC ${CPP_JIEBA_LIBRARY})
endif()
@ -621,7 +629,6 @@ if (USE_TSQUERY)
dbms_target_link_libraries(PUBLIC ${TSQUERY_LIBRARY})
endif()
if (TARGET ch_contrib::ulid)
dbms_target_link_libraries (PUBLIC ch_contrib::ulid)
endif()
@ -639,6 +646,7 @@ if (USE_NLP)
endif()
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${PARALLEL_HASHMAP_INCLUDE_DIR})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${MAGIC_ENUM_INCLUDE_DIR})
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")

View File

@ -13,7 +13,13 @@
* limitations under the License.
*/
#include <algorithm>
#include <iterator>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include <Catalog/Catalog.h>
#include <Catalog/CatalogFactory.h>
#include <Catalog/DataModelPartWrapper.h>
@ -33,6 +39,11 @@
#include <Common/RpcClientPool.h>
#include <Common/serverLocality.h>
#include <Common/ScanWaitFreeMap.h>
#include <Catalog/MetastoreCommon.h>
#include <DataTypes/ObjectUtils.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/StorageSnapshot.h>
#include <Core/Types.h>
// #include <Access/MaskingPolicyDataModel.h>
// #include <Access/MaskingPolicyCommon.h>
@ -1208,6 +1219,22 @@ namespace Catalog
ProfileEvents::SetWorkerGroupForTableFailed);
}
void Catalog::initStorageObjectSchema(StoragePtr & res)
{
// Load dynamic object column schema
if (res && hasDynamicSubcolumns(res->getInMemoryMetadata().getColumns()))
{
auto cnch_table = std::dynamic_pointer_cast<StorageCnchMergeTree>(res);
if (cnch_table)
{
auto assembled_schema = tryGetTableObjectAssembledSchema(res->getStorageUUID());
auto partial_schemas = tryGetTableObjectPartialSchemas(res->getStorageUUID());
cnch_table->resetObjectSchemas(assembled_schema, partial_schemas);
}
}
}
StoragePtr Catalog::getTable(const Context & query_context, const String & database, const String & name, const TxnTimestamp & ts)
{
StoragePtr res = nullptr;
@ -1245,6 +1272,7 @@ namespace Catalog
res = createTableFromDataModel(query_context, *table);
initStorageObjectSchema(res);
/// TODO: (zuochuang.zema, guanzhe.andy) handle TimeTravel
if (auto * cnch_merge_tree = dynamic_cast<StorageCnchMergeTree *>(res.get()))
{
@ -1313,6 +1341,9 @@ namespace Catalog
if (!table)
return;
res = createTableFromDataModel(query_context, *table);
initStorageObjectSchema(res);
/// Try insert the storage into cache.
if (res && cache_manager)
{
@ -6241,6 +6272,208 @@ namespace Catalog
d.set_definition(create_query);
}
void Catalog::appendObjectPartialSchema(
const StoragePtr & table, const TxnTimestamp & txn_id, const MutableMergeTreeDataPartsCNCHVector & parts)
{
//txn partial schema
//multi column
auto cnch_table = std::dynamic_pointer_cast<StorageCnchMergeTree>(table);
if (!cnch_table)
return;
auto subcolumns_limit = cnch_table->getSettings()->json_subcolumns_threshold;
//check schema compatibility and merge part schema
auto partial_schema = DB::getConcreteObjectColumns(
parts.begin(), parts.end(), table->getInMemoryMetadata().columns, [](const auto & part) { return part->getColumns(); });
// compare with existed schema , check if it need to insert
// Attention: this comparison will scan existed partial schema from meta store, it may cost too many meta store resource.
// if it cause meta store performance fallback, just remove this comparison
auto assembled_schema = tryGetTableObjectAssembledSchema(table->getStorageUUID());
auto existed_partial_schemas = tryGetTableObjectPartialSchemas(table->getStorageUUID());
std::vector<TxnTimestamp> existed_partial_schema_txnids(existed_partial_schemas.size());
std::for_each(
existed_partial_schemas.begin(),
existed_partial_schemas.end(),
[&existed_partial_schema_txnids](const auto & existed_partial_schema) {
existed_partial_schema_txnids.emplace_back(existed_partial_schema.first);
});
auto committed_partial_schema_txnids = filterUncommittedObjectPartialSchemas(existed_partial_schema_txnids);
std::vector<ObjectPartialSchema> committed_partial_schema_list(committed_partial_schema_txnids.size() + 2);
std::for_each(
committed_partial_schema_txnids.begin(),
committed_partial_schema_txnids.end(),
[&committed_partial_schema_list, &existed_partial_schemas](const auto & txn_id) {
committed_partial_schema_list.emplace_back(existed_partial_schemas[txn_id]);
});
committed_partial_schema_list.emplace_back(assembled_schema);
auto existed_assembled_schema = DB::getConcreteObjectColumns(
committed_partial_schema_list.begin(),
committed_partial_schema_list.end(),
cnch_table->getInMemoryMetadata().getColumns(),
[](const auto & partial_schema_) { return partial_schema_; });
committed_partial_schema_list.emplace_back(partial_schema);
auto new_assembled_schema = DB::getConcreteObjectColumns(
committed_partial_schema_list.begin(),
committed_partial_schema_list.end(),
cnch_table->getInMemoryMetadata().getColumns(),
[](const auto & partial_schema_) { return partial_schema_; });
if (new_assembled_schema != existed_assembled_schema)
{
DB::limitObjectSubcolumns(new_assembled_schema, subcolumns_limit);
meta_proxy->appendObjectPartialSchema(
name_space, UUIDHelpers::UUIDToString(table->getStorageUUID()), txn_id.toUInt64(), partial_schema.toString());
cnch_table->appendObjectPartialSchema(txn_id, partial_schema);
LOG_DEBUG(
log,
"Append dynamic object partial schema [TxnTimestamp:{}, Partial Schema:{}]",
txn_id.toString(),
partial_schema.toString());
}
}
ObjectAssembledSchema Catalog::tryGetTableObjectAssembledSchema(const UUID & table_uuid) const
{
auto serialized_assembled_schema = meta_proxy->getObjectAssembledSchema(name_space, UUIDHelpers::UUIDToString(table_uuid));
if (serialized_assembled_schema.empty())
return ColumnsDescription();
return ColumnsDescription::parse(serialized_assembled_schema);
}
ObjectPartialSchemas Catalog::tryGetTableObjectPartialSchemas(const UUID & table_uuid, const int & limit_size) const
{
auto serialized_partial_schemas
= meta_proxy->scanObjectPartialSchemas(name_space, UUIDHelpers::UUIDToString(table_uuid), limit_size);
ObjectPartialSchemas partial_schemas;
partial_schemas.reserve(serialized_partial_schemas.size());
std::for_each(
serialized_partial_schemas.begin(),
serialized_partial_schemas.end(),
[&partial_schemas](std::pair<String, String> serialized_partial_schema) {
partial_schemas.emplace(
std::stoll(serialized_partial_schema.first),
ColumnsDescription::parse(serialized_partial_schema.second));
});
return partial_schemas;
}
bool Catalog::resetObjectAssembledSchemaAndPurgePartialSchemas(
const UUID & table_uuid,
const ObjectAssembledSchema & old_assembled_schema,
const ObjectAssembledSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids)
{
return meta_proxy->resetObjectAssembledSchemaAndPurgePartialSchemas(
name_space,
UUIDHelpers::UUIDToString(table_uuid),
old_assembled_schema.empty() ? "" : old_assembled_schema.toString(),
new_assembled_schema.toString(),
partial_schema_txnids);
}
std::vector<TxnTimestamp> Catalog::filterUncommittedObjectPartialSchemas(std::vector<TxnTimestamp> & unfiltered_partial_schema_txnids)
{
std::vector<TxnTimestamp> committed_partial_schema_txnids;
std::unordered_map<TxnTimestamp, TxnTimestamp, TxnTimestampHasher> unfiltered_partial_schema_txnid_map;
unfiltered_partial_schema_txnid_map.reserve(unfiltered_partial_schema_txnids.size());
std::for_each(
unfiltered_partial_schema_txnids.begin(),
unfiltered_partial_schema_txnids.end(),
[&unfiltered_partial_schema_txnid_map](const auto & txn_id) { unfiltered_partial_schema_txnid_map[txn_id] = txn_id; });
// query partial schema status in meta store
auto partial_schema_statuses = batchGetObjectPartialSchemaStatuses(unfiltered_partial_schema_txnids);
std::for_each(
partial_schema_statuses.begin(),
partial_schema_statuses.end(),
[&committed_partial_schema_txnids, &unfiltered_partial_schema_txnid_map](const auto & partial_schema_status_pair) {
if (partial_schema_status_pair.second == ObjectPartialSchemaStatus::Finished)
{
committed_partial_schema_txnids.emplace_back(partial_schema_status_pair.first);
unfiltered_partial_schema_txnid_map.erase(partial_schema_status_pair.first);
}
});
// query remaining partial schemas by its co-responding txn record status
unfiltered_partial_schema_txnids.clear();
std::transform(
unfiltered_partial_schema_txnid_map.begin(),
unfiltered_partial_schema_txnid_map.end(),
std::back_inserter(unfiltered_partial_schema_txnids),
[](const auto & txn_id_pair) { return txn_id_pair.first; });
auto txn_record_statuses = getTransactionRecords(unfiltered_partial_schema_txnids, 10000);
std::for_each(
txn_record_statuses.begin(), txn_record_statuses.end(), [&committed_partial_schema_txnids](TransactionRecord txn_record) {
auto txn_id = txn_record.txnID();
auto status = txn_record.status();
if (status == CnchTransactionStatus::Finished)
committed_partial_schema_txnids.emplace_back(txn_id);
});
return committed_partial_schema_txnids;
}
ObjectPartialSchemaStatuses
Catalog::batchGetObjectPartialSchemaStatuses(const std::vector<TxnTimestamp> & txn_ids, const int & batch_size)
{
ObjectPartialSchemaStatuses partial_schema_statuses;
size_t total_txn_size = txn_ids.size();
partial_schema_statuses.reserve(total_txn_size);
auto fetch_records_in_batch = [&](size_t begin, size_t end) {
auto statuses_in_metastore = meta_proxy->batchGetObjectPartialSchemaStatuses(
name_space, std::vector<TxnTimestamp>(txn_ids.begin() + begin, txn_ids.begin() + end));
for (const auto & serialized_partial_schema_status : statuses_in_metastore)
{
auto txn_id = serialized_partial_schema_status.first;
auto status = ObjectSchemas::deserializeObjectPartialSchemaStatus(serialized_partial_schema_status.second);
partial_schema_statuses.emplace(txn_id, status);
}
};
if (batch_size > 0)
{
size_t batch_count{0};
while (batch_count + batch_size < total_txn_size)
{
fetch_records_in_batch(batch_count, batch_count + batch_size);
batch_count += batch_size;
}
fetch_records_in_batch(batch_count, total_txn_size);
}
else
fetch_records_in_batch(0, total_txn_size);
return partial_schema_statuses;
}
void Catalog::batchDeleteObjectPartialSchemaStatus(const std::vector<TxnTimestamp> &txn_ids)
{
meta_proxy->batchDeletePartialSchemaStatus(name_space, txn_ids);
}
void Catalog::commitObjectPartialSchema(const TxnTimestamp &txn_id)
{
meta_proxy->updateObjectPartialSchemaStatus(name_space, txn_id, ObjectPartialSchemaStatus::Finished);
}
void Catalog::abortObjectPartialSchema(const TxnTimestamp & txn_id)
{
meta_proxy->updateObjectPartialSchemaStatus(name_space, txn_id, ObjectPartialSchemaStatus::Aborted);
}
std::unordered_map<String, std::shared_ptr<PartitionMetrics>>
Catalog::loadPartitionMetricsSnapshotFromMetastore(const String & table_uuid)
{

View File

@ -16,6 +16,7 @@
#pragma once
#include <map>
#include <optional>
#include <set>
#include <Catalog/CatalogUtils.h>
#include <Catalog/DataModelPartWrapper.h>
@ -34,12 +35,15 @@
#include <Transaction/TransactionCommon.h>
#include <Transaction/TxnTimestamp.h>
#include <cppkafka/cppkafka.h>
#include "common/types.h"
#include <Common/Config/MetastoreConfig.h>
#include <Common/Configurations.h>
#include <Common/DNSResolver.h>
#include <Common/HostWithPorts.h>
#include <Common/Config/MetastoreConfig.h>
#include <common/getFQDNOrHostName.h>
#include "Catalog/IMetastore.h"
#include "Storages/IStorage_fwd.h"
#include <Storages/StorageSnapshot.h>
#include <Catalog/IMetastore.h>
// #include <Access/MaskingPolicyDataModel.h>
namespace DB::ErrorCodes
@ -755,6 +759,25 @@ public:
DeleteBitmapMetaPtrVector listDetachedDeleteBitmaps(const MergeTreeMetaBase & storage, const AttachFilter & filter);
// Append partial object column schema in Txn
void
appendObjectPartialSchema(const StoragePtr & table, const TxnTimestamp & txn_id, const MutableMergeTreeDataPartsCNCHVector & parts);
ObjectAssembledSchema tryGetTableObjectAssembledSchema(const UUID & table_uuid) const;
std::vector<TxnTimestamp> filterUncommittedObjectPartialSchemas(std::vector<TxnTimestamp> & unfiltered_partial_schema_txnids);
// @param limit_size -1 means no limit , read all partial schemas as possible
ObjectPartialSchemas tryGetTableObjectPartialSchemas(const UUID & table_uuid, const int & limit_size = -1) const;
bool resetObjectAssembledSchemaAndPurgePartialSchemas(
const UUID & table_uuid,
const ObjectAssembledSchema & old_assembled_schema,
const ObjectAssembledSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids);
ObjectPartialSchemaStatuses batchGetObjectPartialSchemaStatuses(const std::vector<TxnTimestamp> & txn_ids, const int & batch_size = 10000);
void batchDeleteObjectPartialSchemaStatus(const std::vector<TxnTimestamp> & txn_ids);
void commitObjectPartialSchema(const TxnTimestamp & txn_id);
void abortObjectPartialSchema(const TxnTimestamp & txn_id);
void initStorageObjectSchema(StoragePtr & res);
// Access Entities
std::optional<AccessEntityModel> tryGetAccessEntity(EntityType type, const String & name);
std::vector<AccessEntityModel> getAllAccessEntities(EntityType type);
@ -762,6 +785,7 @@ public:
void dropAccessEntity(EntityType type, const UUID & uuid, const String & name);
void putAccessEntity(EntityType type, AccessEntityModel & new_access_entity, AccessEntityModel & old_access_entity, bool replace_if_exists = true);
private:
Poco::Logger * log = &Poco::Logger::get("Catalog");
Context & context;

View File

@ -26,10 +26,12 @@
#include <DaemonManager/BGJobStatusInCatalog.h>
#include <IO/ReadHelpers.h>
#include <Protos/DataModelHelpers.h>
#include "common/types.h"
#include <common/types.h>
#include <common/logger_useful.h>
#include "Catalog/MetastoreByteKVImpl.h"
#include "Interpreters/executeQuery.h"
#include <Catalog/MetastoreByteKVImpl.h>
#include <Interpreters/executeQuery.h>
#include <Storages/MergeTree/IMetastore.h>
#include <Storages/StorageSnapshot.h>
namespace DB::ErrorCodes
{
@ -1585,6 +1587,11 @@ void MetastoreProxy::setBGJobStatus(const String & name_space, const String & uu
dedupWorkerBGJobStatusKey(name_space, uuid),
String{BGJobStatusInCatalog::serializeToChar(status)}
);
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
metastore_ptr->put(
objectSchemaAssembleBGJobStatusKey(name_space, uuid),
String{BGJobStatusInCatalog::serializeToChar(status)}
);
else
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
}
@ -1604,6 +1611,8 @@ std::optional<CnchBGThreadStatus> MetastoreProxy::getBGJobStatus(const String &
metastore_ptr->get(mmysqlBGJobStatusKey(name_space, uuid), status_store_data);
else if (type == CnchBGThreadType::DedupWorker)
metastore_ptr->get(dedupWorkerBGJobStatusKey(name_space, uuid), status_store_data);
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
metastore_ptr->get(objectSchemaAssembleBGJobStatusKey(name_space, uuid), status_store_data);
else
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
@ -1638,6 +1647,8 @@ std::unordered_map<UUID, CnchBGThreadStatus> MetastoreProxy::getBGJobStatuses(co
return metastore_ptr->getByPrefix(allMmysqlBGJobStatusKeyPrefix(name_space));
else if (type == CnchBGThreadType::DedupWorker)
return metastore_ptr->getByPrefix(allDedupWorkerBGJobStatusKeyPrefix(name_space));
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
return metastore_ptr->getByPrefix(allObjectSchemaAssembleBGJobStatusKeyPrefix(name_space));
else
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
};
@ -1679,6 +1690,9 @@ void MetastoreProxy::dropBGJobStatus(const String & name_space, const String & u
case CnchBGThreadType::DedupWorker:
metastore_ptr->drop(dedupWorkerBGJobStatusKey(name_space, uuid));
break;
case CnchBGThreadType::ObjectSchemaAssemble:
metastore_ptr->drop(objectSchemaAssembleBGJobStatusKey(name_space, uuid));
break;
default:
throw Exception(String{"persistent status is not support for "} + toString(type), ErrorCodes::LOGICAL_ERROR);
}
@ -2710,6 +2724,139 @@ IMetaStore::IteratorPtr MetastoreProxy::getItemsInTrash(const String & name_spac
return metastore_ptr->getByPrefix(trashItemsPrefix(name_space, table_uuid), limit);
}
String MetastoreProxy::extractTxnIDFromPartialSchemaKey(const String &partial_schema_key)
{
auto pos = partial_schema_key.find_last_of('_');
return partial_schema_key.substr(pos + 1, String::npos);
}
void MetastoreProxy::appendObjectPartialSchema(
const String & name_space, const String & table_uuid, const UInt64 & txn_id, const SerializedObjectSchema & partial_schema)
{
BatchCommitRequest batch_write;
batch_write.AddPut(SinglePutRequest(partialSchemaKey(name_space, table_uuid, txn_id), partial_schema));
batch_write.AddPut(SinglePutRequest(
partialSchemaStatusKey(name_space, txn_id),
ObjectSchemas::serializeObjectPartialSchemaStatus(ObjectPartialSchemaStatus::Running)));
BatchCommitResponse store_response;
metastore_ptr->batchWrite(batch_write, store_response);
}
SerializedObjectSchema MetastoreProxy::getObjectPartialSchema(const String &name_space, const String &table_uuid, const UInt64 &txn_id)
{
SerializedObjectSchema partial_schema;
metastore_ptr->get(partialSchemaKey(name_space, table_uuid, txn_id), partial_schema);
if (partial_schema.empty())
return "";
return partial_schema;
}
SerializedObjectSchemas MetastoreProxy::scanObjectPartialSchemas(const String &name_space, const String &table_uuid, const UInt64 &limit_size)
{
auto scan_prefix = partialSchemaPrefix(name_space, table_uuid);
UInt64 scan_limit = limit_size <= 0 ? 0 : limit_size;
auto scan_iterator = metastore_ptr->getByPrefix(scan_prefix, scan_limit);
SerializedObjectSchemas serialized_object_schemas;
while (scan_iterator->next())
{
auto key = scan_iterator->key();
serialized_object_schemas.emplace(extractTxnIDFromPartialSchemaKey(key), scan_iterator->value());
}
return serialized_object_schemas;
}
SerializedObjectSchema MetastoreProxy::getObjectAssembledSchema(const String &name_space, const String &table_uuid)
{
SerializedObjectSchema assembled_schema;
metastore_ptr->get(assembledSchemaKey(name_space, table_uuid), assembled_schema);
if (assembled_schema.empty())
return "";
return assembled_schema;
}
bool MetastoreProxy::resetObjectAssembledSchemaAndPurgePartialSchemas(
const String & name_space,
const String & table_uuid,
const SerializedObjectSchema & old_assembled_schema,
const SerializedObjectSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids)
{
Poco::Logger * log = &Poco::Logger::get(__func__);
BatchCommitRequest batch_write;
bool if_not_exists = false;
if (old_assembled_schema.empty())
if_not_exists = true;
auto update_request = SinglePutRequest(assembledSchemaKey(name_space, table_uuid), new_assembled_schema, old_assembled_schema);
update_request.if_not_exists = if_not_exists;
batch_write.AddPut(update_request);
for (const auto & txn_id : partial_schema_txnids)
batch_write.AddDelete(partialSchemaKey(name_space, table_uuid, txn_id.toUInt64()));
BatchCommitResponse store_response;
try
{
metastore_ptr->batchWrite(batch_write, store_response);
return true;
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::METASTORE_COMMIT_CAS_FAILURE)
{
LOG_WARNING(
log,
fmt::format(
"Object schema refresh CAS put fail with old schema:{} and new schema:{}", old_assembled_schema, new_assembled_schema));
return false;
}
else
throw e;
}
}
SerializedObjectSchemaStatuses MetastoreProxy::batchGetObjectPartialSchemaStatuses(const String &name_space, const std::vector<TxnTimestamp> &txn_ids)
{
Strings keys;
for (const auto & txn_id : txn_ids)
keys.emplace_back(partialSchemaStatusKey(name_space, txn_id.toUInt64()));
auto serialized_statuses_in_metastore = metastore_ptr->multiGet(keys);
SerializedObjectSchemaStatuses serialized_statuses;
serialized_statuses.reserve(serialized_statuses_in_metastore.size());
for (size_t i = 0; i < serialized_statuses_in_metastore.size(); i++)
{
auto txn_id = txn_ids[i];
auto status = serialized_statuses_in_metastore[i].first;
if (status.empty())
serialized_statuses.emplace(txn_id, ObjectSchemas::serializeObjectPartialSchemaStatus(ObjectPartialSchemaStatus::Finished));
else
serialized_statuses.emplace(txn_id, status);
}
return serialized_statuses;
}
void MetastoreProxy::batchDeletePartialSchemaStatus(const String &name_space, const std::vector<TxnTimestamp> &txn_ids)
{
BatchCommitRequest batch_delete;
for (const auto & txn_id : txn_ids)
batch_delete.AddDelete(partialSchemaStatusKey(name_space, txn_id.toUInt64()));
BatchCommitResponse delete_result;
metastore_ptr->batchWrite(batch_delete, delete_result);
}
void MetastoreProxy::updateObjectPartialSchemaStatus(const String &name_space, const TxnTimestamp &txn_id, const ObjectPartialSchemaStatus & status)
{
metastore_ptr->put(partialSchemaStatusKey(name_space, txn_id), ObjectSchemas::serializeObjectPartialSchemaStatus(status));
}
IMetaStore::IteratorPtr MetastoreProxy::getAllDeleteBitmaps(const String & name_space, const String & table_uuid)
{
return metastore_ptr->getByPrefix(deleteBitmapPrefix(name_space, table_uuid));

View File

@ -22,6 +22,7 @@
#include <Storages/MergeTree/DeleteBitmapMeta.h>
// #include <Transaction/ICnchTransaction.h>
#include <sstream>
#include <unordered_map>
#include <unordered_set>
#include <Catalog/IMetastore.h>
#include <CloudServices/CnchBGThreadCommon.h>
@ -36,6 +37,7 @@
#include <cppkafka/cppkafka.h>
#include <google/protobuf/repeated_field.h>
#include <common/types.h>
#include <Storages/StorageSnapshot.h>
#include <Access/IAccessEntity.h>
#include <Parsers/formatTenantDatabaseName.h>
#include <Interpreters/SQLBinding/SQLBinding.h>
@ -95,6 +97,7 @@ namespace DB::Catalog
#define PARTGC_BG_JOB_STATUS "PARTGC_BGJS_"
#define CONSUMER_BG_JOB_STATUS "CONSUMER_BGJS_"
#define DEDUPWORKER_BG_JOB_STATUS "DEDUPWORKER_BGJS_"
#define OBJECT_SCHEMA_ASSEMBLE_BG_JOB_STATUS "OBJECT_SCHEMA_ASSEMBLE_BGJS_"
#define PREALLOCATE_VW "PVW_"
#define DICTIONARY_STORE_PREFIX "DIC_"
#define RESOURCE_GROUP_PREFIX "RG_"
@ -113,6 +116,9 @@ namespace DB::Catalog
#define MATERIALIZEDMYSQL_PREFIX "MMYSQL_"
#define MATERIALIZEDMYSQL_BG_JOB_STATUS "MATERIALIZEDMYSQL_BGJS_"
#define DETACHED_DELETE_BITMAP_PREFIX "DDLB_"
#define OBJECT_PARTIAL_SCHEMA_PREFIX "PS_"
#define OBJECT_ASSEMBLED_SCHEMA_PREFIX "AS_"
#define OBJECT_PARTIAL_SCHEMA_STATUS_PREFIX "PSS_"
#define PARTITION_PARTS_METRICS_SNAPSHOT_PREFIX "PPS_"
#define TABLE_TRASHITEMS_METRICS_SNAPSHOT_PREFIX "TTS_"
#define DICTIONARY_BUCKET_UPDATE_TIME_PREFIX "DBUT_"
@ -143,6 +149,10 @@ static EntityMetastorePrefix getEntityMetastorePrefix(EntityType type)
}
}
using SerializedObjectSchemas = std::unordered_map<String, String>;
using SerializedObjectSchemaStatuses = std::unordered_map<TxnTimestamp, String, TxnTimestampHasher>;
using SerializedObjectSchema = String;
static std::shared_ptr<MetastoreFDBImpl> getFDBInstance(const String & cluster_config_path)
{
/// Notice: A single process can only have fdb instance
@ -592,6 +602,16 @@ public:
return allDedupWorkerBGJobStatusKeyPrefix(name_space) + uuid;
}
static std::string allObjectSchemaAssembleBGJobStatusKeyPrefix(const std::string & name_space)
{
return escapeString(name_space) + '_' + OBJECT_SCHEMA_ASSEMBLE_BG_JOB_STATUS;
}
static std::string objectSchemaAssembleBGJobStatusKey(const std::string & name_space, const std::string & uuid)
{
return allObjectSchemaAssembleBGJobStatusKeyPrefix(name_space) + uuid;
}
static UUID parseUUIDFromBGJobStatusKey(const std::string & key);
static std::string preallocateVW(const std::string & name_space, const std::string & uuid)
@ -765,6 +785,26 @@ public:
return escapeString(name_space) + "_" + DATA_ITEM_TRASH_PREFIX + uuid + "_";
}
static String partialSchemaPrefix(const String & name_space, const String & table_uuid)
{
return escapeString(name_space) + "_" + OBJECT_PARTIAL_SCHEMA_PREFIX + table_uuid + "_";
}
static String partialSchemaKey(const String & name_space, const String & table_uuid, const UInt64 & txn_id)
{
return escapeString(name_space) + "_" + OBJECT_PARTIAL_SCHEMA_PREFIX + table_uuid + "_" + toString(txn_id);
}
static String assembledSchemaKey(const String & name_space, const String & table_uuid)
{
return escapeString(name_space) + "_" + OBJECT_ASSEMBLED_SCHEMA_PREFIX + table_uuid;
}
static String partialSchemaStatusKey(const String & name_space, const UInt64 & txn_id)
{
return escapeString(name_space) + "-" + OBJECT_PARTIAL_SCHEMA_STATUS_PREFIX + "_" + toString(txn_id);
}
static String partitionPartsMetricsSnapshotPrefix(const String & name_space, const String & table_uuid, const String & partition_id)
{
return escapeString(name_space) + "_" + PARTITION_PARTS_METRICS_SNAPSHOT_PREFIX + table_uuid + "_" + partition_id;
@ -1105,6 +1145,25 @@ public:
* @param limit Limit the results, disabled by passing 0.
*/
IMetaStore::IteratorPtr getItemsInTrash(const String & name_space, const String & table_uuid, const size_t & limit);
//Object column schema related API
static String extractTxnIDFromPartialSchemaKey(const String & partial_schema_key);
void appendObjectPartialSchema(
const String & name_space, const String & table_uuid, const UInt64 & txn_id, const SerializedObjectSchema & partial_schema);
SerializedObjectSchema getObjectPartialSchema(const String & name_space, const String & table_uuid, const UInt64 & txn_id);
SerializedObjectSchemas scanObjectPartialSchemas(const String & name_space, const String & table_uuid, const UInt64 & limit_size);
SerializedObjectSchema getObjectAssembledSchema(const String & name_space, const String & table_uuid);
bool resetObjectAssembledSchemaAndPurgePartialSchemas(
const String & name_space,
const String & table_uuid,
const SerializedObjectSchema & old_assembled_schema,
const SerializedObjectSchema & new_assembled_schema,
const std::vector<TxnTimestamp> & partial_schema_txnids);
SerializedObjectSchemaStatuses batchGetObjectPartialSchemaStatuses(const String & name_space, const std::vector<TxnTimestamp> & txn_ids);
void batchDeletePartialSchemaStatus(const String & name_space, const std::vector<TxnTimestamp> & txn_ids);
void updateObjectPartialSchemaStatus(const String &name_space, const TxnTimestamp & txn_id, const ObjectPartialSchemaStatus & status);
IMetaStore::IteratorPtr getAllDeleteBitmaps(const String & name_space, const String & table_uuid);
/**

View File

@ -35,9 +35,10 @@ namespace CnchBGThread
DedupWorker = 5,
Clustering = 6,
MaterializedMySQL = 7,
ObjectSchemaAssemble = 8,
ServerMinType = PartGC,
ServerMaxType = MaterializedMySQL,
ServerMaxType = ObjectSchemaAssemble,
GlobalGC = 20, /// reserve several entries
TxnGC = 21,
@ -96,6 +97,8 @@ constexpr auto toString(CnchBGThreadType type)
return "TxnGCThread";
case CnchBGThreadType::ResourceReport:
return "ResourceReport";
case CnchBGThreadType::ObjectSchemaAssemble:
return "ObjectSchemaAssembleThread";
case CnchBGThreadType::MemoryBuffer:
return "MemoryBuffer";
case CnchBGThreadType::MaterializedMySQL:

View File

@ -23,6 +23,8 @@
#include <CloudServices/CnchPartGCThread.h>
#include <CloudServices/DedupWorkerManager.h>
#include <CloudServices/ReclusteringManagerThread.h>
#include <CloudServices/CnchObjectColumnSchemaAssembleThread.h>
#include <Databases/MySQL/MaterializedMySQLSyncThreadManager.h>
#include <regex>
@ -71,6 +73,10 @@ CnchBGThreadPtr CnchBGThreadsMap::createThread(const StorageID & storage_id)
{
return std::make_shared<ReclusteringManagerThread>(getContext(), storage_id);
}
else if (type == CnchBGThreadType::ObjectSchemaAssemble)
{
return std::make_shared<CnchObjectColumnSchemaAssembleThread>(getContext(), storage_id);
}
else if (type == CnchBGThreadType::MaterializedMySQL)
{
return std::make_shared<MaterializedMySQLSyncThreadManager>(getContext(), storage_id);

View File

@ -0,0 +1,138 @@
#include <algorithm>
#include <string>
#include <CloudServices/CnchObjectColumnSchemaAssembleThread.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/Context.h>
#include <Storages/CnchStorageCache.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/PartCacheManager.h>
#include <Storages/StorageCnchMergeTree.h>
#include <Storages/StorageSnapshot.h>
#include <Transaction/TxnTimestamp.h>
#include <fmt/format.h>
namespace DB
{
CnchObjectColumnSchemaAssembleThread::CnchObjectColumnSchemaAssembleThread(ContextPtr context_, const StorageID & id_)
: ICnchBGThread(context_, CnchBGThreadType::ObjectSchemaAssemble, id_)
{
}
void CnchObjectColumnSchemaAssembleThread::runImpl()
{
try
{
auto storage = getStorageFromCatalog();
auto table_uuid = storage->getStorageUUID();
auto & table = checkAndGetCnchTable(storage);
auto storage_settings = table.getSettings();
auto database_name = storage->getDatabaseName();
auto table_name = storage->getTableName();
if (storage->is_dropped)
{
LOG_DEBUG(log, "Table was dropped, wait for removing...");
scheduled_task->scheduleAfter(10 * 1000);
return;
}
if (storage->supportsDynamicSubcolumns() && hasDynamicSubcolumns(storage->getInMemoryMetadata().getColumns()))
{
LOG_INFO(log, "{}.{} Start assemble partial schemas", database_name, table_name);
auto catalog = getContext()->getCnchCatalog();
auto [current_topology_version, current_topology] = getContext()->getCnchTopologyMaster()->getCurrentTopologyVersion();
// Step 1:scan object partial schema and drop uncommitted partial schemas
auto old_assembled_schema = catalog->tryGetTableObjectAssembledSchema(table_uuid);
auto partial_schemas
= catalog->tryGetTableObjectPartialSchemas(table_uuid, storage_settings->json_partial_schema_assemble_batch_size);
LOG_DEBUG(log, "{}.{} Before assemble. Assembled schema :{}", database_name, table_name, old_assembled_schema.toString());
if (partial_schemas.empty())
{
LOG_INFO(log, "{}.{} no need to refresh dynamic object column schema.", database_name, table_name);
scheduled_task->scheduleAfter(50 * 1000);
return;
}
std::vector<TxnTimestamp> unfiltered_partial_schema_txnids;
unfiltered_partial_schema_txnids.reserve(partial_schemas.size());
for (const auto & [txn_id, partial_schema] : partial_schemas)
{
LOG_DEBUG(
log,
"{}.{} Before assemble. Partial schema :[{}->{}]",
database_name,
table_name,
txn_id.toString(),
partial_schema.toString());
unfiltered_partial_schema_txnids.emplace_back(txn_id);
}
auto committed_partial_schema_txnids = catalog->filterUncommittedObjectPartialSchemas(unfiltered_partial_schema_txnids);
// Step 2:assemble partial schema to assembled schema
std::vector<String> partial_schema_txn_ids_for_print;
std::vector<ColumnsDescription> schemas_ready_to_assemble;
partial_schema_txn_ids_for_print.reserve(committed_partial_schema_txnids.size());
schemas_ready_to_assemble.reserve(committed_partial_schema_txnids.size() + 1);
for (auto & txn_id : committed_partial_schema_txnids)
{
auto partial_schema = partial_schemas[txn_id];
schemas_ready_to_assemble.emplace_back(partial_schema);
partial_schema_txn_ids_for_print.emplace_back(txn_id.toString());
}
schemas_ready_to_assemble.emplace_back(old_assembled_schema);
auto new_assembled_schema = DB::getConcreteObjectColumns(
schemas_ready_to_assemble.begin(),
schemas_ready_to_assemble.end(),
storage->getInMemoryMetadata().getColumns(),
[](const auto & schema) { return schema; });
// Step 3:update assembled schema and delete partial schema in meta store
// TODO:@lianwenlong consider purge fail and check lease
auto cas_put_result = catalog->resetObjectAssembledSchemaAndPurgePartialSchemas(
table_uuid, old_assembled_schema, new_assembled_schema, committed_partial_schema_txnids);
LOG_DEBUG(
log,
"{}.{} After assemble.Assembled schema :{} and deleted txn ids:{}, result:{}",
database_name,
table_name,
new_assembled_schema.toString(),
fmt::join(partial_schema_txn_ids_for_print, ","),
std::to_string(cas_put_result));
if (cas_put_result)
{
// Step 4:update assembled schema and delete partial schema in storage cache
if (auto cache_manager = getContext()->getPartCacheManager())
{
if (auto storage_in_cache = cache_manager->getStorageFromCache(table_uuid, current_topology_version))
{
auto & table_in_cache = checkAndGetCnchTable(storage_in_cache);
table_in_cache.refreshAssembledSchema(new_assembled_schema, committed_partial_schema_txnids);
}
}
// Step5: clean partial schema status in meta store including
catalog->batchDeleteObjectPartialSchemaStatus(committed_partial_schema_txnids);
// Step6: @TODO:lianwenlong rollback aborted partial schema from meta store and storage cache
}
LOG_INFO(
log, "{}.{} Finish assemble partial schemas with result:{}", database_name, table_name, std::to_string(cas_put_result));
}
}
catch (...)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
}
scheduled_task->scheduleAfter(50 * 1000);
}
} //namespace DB end

View File

@ -0,0 +1,16 @@
#pragma once
#include <CloudServices/ICnchBGThread.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
class CnchObjectColumnSchemaAssembleThread : public ICnchBGThread
{
public:
CnchObjectColumnSchemaAssembleThread(ContextPtr context_, const StorageID & id_);
private:
void runImpl() override;
};
}//namespace DB end

View File

@ -27,9 +27,10 @@
#include <common/logger_useful.h>
#include <Common/CurrentThread.h>
#include <Common/Exception.h>
#include "Interpreters/Context_fwd.h"
#include "Storages/Hive/HiveFile/IHiveFile.h"
#include "Storages/Hive/StorageCnchHive.h"
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/Context_fwd.h>
#include <Storages/Hive/HiveFile/IHiveFile.h>
#include <Storages/Hive/StorageCnchHive.h>
#include <Storages/RemoteFile/StorageCnchHDFS.h>
#include <Storages/RemoteFile/StorageCnchS3.h>
@ -63,6 +64,7 @@ AssignedResource::AssignedResource(AssignedResource && resource)
part_names = resource.part_names; // don't call move here
resource.sent_create_query = true;
object_columns = resource.object_columns;
}
void AssignedResource::addDataParts(const ServerDataPartsVector & parts)
@ -470,6 +472,7 @@ void CnchServerResource::allocateResource(
worker_resource.create_table_query = resource.create_table_query;
worker_resource.worker_table_name = resource.worker_table_name;
worker_resource.bucket_numbers = assigned_bucket_numbers;
worker_resource.object_columns = resource.object_columns;
}
}
}

View File

@ -17,6 +17,8 @@
#include <optional>
#include <Catalog/DataModelPartWrapper_fwd.h>
#include <CloudServices/CnchWorkerClient.h>
#include <Common/HostWithPorts.h>
#include <Storages/ColumnsDescription.h>
#include <Core/Types.h>
#include <Interpreters/StorageID.h>
#include <Interpreters/WorkerGroupHandle.h>
@ -86,6 +88,8 @@ struct AssignedResource
std::unordered_set<String> part_names;
ColumnsDescription object_columns;
void addDataParts(const ServerDataPartsVector & parts);
void addDataParts(const FileDataPartsCNCHVector & parts);
void addDataParts(const HiveFiles & parts);
@ -141,6 +145,13 @@ public:
assigned_resource.bucket_numbers = required_bucket_numbers;
}
void setResourceReplicated(const UUID & storage_id, bool replicated)
{
std::lock_guard lock(mutex);
auto & assigned_resource = assigned_table_resource.at(storage_id);
assigned_resource.replicated = replicated;
}
/// Send resource to worker
void sendResource(const ContextPtr & context, const HostWithPorts & worker);
/// allocate and send resource to worker_group
@ -152,6 +163,14 @@ public:
void sendResources(const ContextPtr & context, WorkerAction act);
void cleanResource();
void addDynamicObjectSchema(const UUID & storage_id, const ColumnsDescription & object_columns_)
{
std::lock_guard lock(mutex);
auto & assigned_resource = assigned_table_resource.at(storage_id);
assigned_resource.object_columns = object_columns_;
}
void setSendMutations(bool send_mutations_) { send_mutations = send_mutations_; }
private:

View File

@ -26,12 +26,13 @@
#include <Transaction/TransactionCommon.h>
#include <Transaction/TransactionCoordinatorRcCnch.h>
#include <Transaction/TxnTimestamp.h>
#include "Common/tests/gtest_global_context.h"
#include <Common/RWLock.h>
#include <Common/tests/gtest_global_context.h>
#include <Statistics/AutoStatisticsHelper.h>
#include <Statistics/AutoStatisticsRpcUtils.h>
#include <Statistics/AutoStatisticsManager.h>
#include <Common/Exception.h>
#include <Access/AccessControlManager.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/DatabaseCatalog.h>
#include <CloudServices/CnchMergeMutateThread.h>
#include <CloudServices/CnchDataWriter.h>

View File

@ -22,8 +22,9 @@
#include <Protos/RPCHelpers.h>
#include <Protos/cnch_worker_rpc.pb.h>
#include <Storages/IStorage.h>
#include "Storages/Hive/HiveFile/IHiveFile.h"
#include "Storages/Hive/StorageCnchHive.h"
#include <DataTypes/ObjectUtils.h>
#include <Storages/Hive/HiveFile/IHiveFile.h>
#include <Storages/Hive/StorageCnchHive.h>
#include <Storages/StorageCnchMergeTree.h>
#include <Transaction/ICnchTransaction.h>
#include <WorkerTasks/ManipulationList.h>
@ -76,6 +77,12 @@ void CnchWorkerClient::submitManipulationTask(
params.mutation_commands->writeText(write_buf);
}
if (hasDynamicSubcolumns(storage.getInMemoryMetadata().columns))
{
request.set_dynamic_object_column_schema(
storage.getStorageSnapshot(storage.getInMemoryMetadataPtr(), nullptr)->object_columns.toString());
}
stub->submitManipulationTask(&cntl, &request, &response, nullptr);
assertController(cntl);
@ -322,7 +329,10 @@ brpc::CallId CnchWorkerClient::sendResources(
for (const auto & resource : resources_to_send)
{
if (!resource.sent_create_query)
{
request.add_create_queries(resource.create_table_query);
request.add_dynamic_object_column_schema(resource.object_columns.toString());
}
/// parts
auto & table_data_parts = *request.mutable_data_parts()->Add();

View File

@ -13,6 +13,7 @@
* limitations under the License.
*/
#include <memory>
#include <CloudServices/CnchWorkerResource.h>
#include <Core/Names.h>
@ -26,6 +27,7 @@
#include <Parsers/ASTForeignKeyDeclaration.h>
#include <Parsers/ASTUniqueNotEnforcedDeclaration.h>
#include <Poco/Logger.h>
#include <Storages/StorageCloudMergeTree.h>
#include <Storages/ForeignKeysDescription.h>
#include <Storages/UniqueNotEnforcedDescription.h>
#include <Storages/IStorage.h>
@ -43,7 +45,7 @@ namespace ErrorCodes
extern const int TABLE_ALREADY_EXISTS;
}
void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists)
void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists, const ColumnsDescription & object_columns)
{
LOG_DEBUG(&Poco::Logger::get("WorkerResource"), "start create cloud table {}", create_query);
const char * begin = create_query.data();
@ -139,6 +141,9 @@ void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const Str
StoragePtr res = StorageFactory::instance().get(ast_create_query, "", context, context->getGlobalContext(), columns, constraints, foreign_keys, unique_not_enforced, false);
res->startup();
if (auto cloud_table = std::dynamic_pointer_cast<StorageCloudMergeTree>(res))
cloud_table->resetObjectColumns(object_columns);
{
auto lock = getLock();
cloud_tables.emplace(std::make_pair(tenant_db, table_name), res);

View File

@ -18,6 +18,7 @@
#include <Interpreters/StorageID.h>
#include <Storages/IStorage_fwd.h>
#include <Databases/IDatabase.h>
#include <Storages/ColumnsDescription.h>
#include <unordered_map>
#include <unordered_set>
@ -33,7 +34,7 @@ class CloudTablesBlockSource;
class CnchWorkerResource
{
public:
void executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists = false);
void executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists = false, const ColumnsDescription & object_columns = {});
StoragePtr getTable(const StorageID & table_id) const;
DatabasePtr getDatabase(const String & database_name) const;
bool isCnchTableInWorker(const StorageID & table_id) const;

View File

@ -48,8 +48,9 @@
#include <brpc/closure_guard.h>
#include <brpc/controller.h>
#include <brpc/stream.h>
#include "Common/Configurations.h"
#include "Common/Exception.h"
#include <Common/Configurations.h>
#include <Storages/ColumnsDescription.h>
#include <Common/Exception.h>
#if USE_RDKAFKA
# include <Storages/Kafka/KafkaTaskCommand.h>
@ -184,6 +185,8 @@ void CnchWorkerServiceImpl::submitManipulationTask(
auto * data = dynamic_cast<StorageCloudMergeTree *>(storage.get());
if (!data)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} is not CloudMergeTree", storage->getStorageID().getNameForLogs());
if (request->has_dynamic_object_column_schema())
data->resetObjectColumns(ColumnsDescription::parse(request->dynamic_object_column_schema()));
auto params = ManipulationTaskParams(storage);
params.type = static_cast<ManipulationType>(request->type());
@ -632,8 +635,14 @@ void CnchWorkerServiceImpl::sendResources(
{
/// create a copy of session_context to avoid modify settings in SessionResource
auto context_for_create = Context::createCopy(query_context);
for (const auto & create_query : request->create_queries())
worker_resource->executeCreateQuery(context_for_create, create_query, true);
for (int i = 0; i < request->create_queries_size(); i++)
{
auto create_query = request->create_queries().at(i);
auto object_columns = request->dynamic_object_column_schema().at(i);
worker_resource->executeCreateQuery(context_for_create, create_query, false, ColumnsDescription::parse(object_columns));
}
LOG_DEBUG(log, "Successfully create {} queries for Session: {}", request->create_queries_size(), request->txn_id());
}

View File

@ -164,16 +164,16 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
/// If there are references to states in final column, we must hold their ownership
/// by holding arenas and source.
auto callback = [&](auto & subcolumn)
auto callback = [&](IColumn & subcolumn)
{
if (auto * aggregate_subcolumn = typeid_cast<ColumnAggregateFunction *>(subcolumn.get()))
if (auto * aggregate_subcolumn = typeid_cast<ColumnAggregateFunction *>(&subcolumn))
{
aggregate_subcolumn->foreign_arenas = concatArenas(column_aggregate_func.foreign_arenas, column_aggregate_func.my_arena);
aggregate_subcolumn->src = column_aggregate_func.getPtr();
}
};
callback(res);
callback(*res);
res->forEachSubcolumnRecursively(callback);
for (auto * val : data)
@ -349,7 +349,7 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
{
size_t size = data.size();
if (size != filter.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filter.size(), size);
if (size == 0)
return cloneEmpty();
@ -371,7 +371,6 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
return res;
}
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const
{
size_t size = data.size();

View File

@ -228,6 +228,21 @@ public:
throw Exception("Method hasEqualValues is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
}
double getRatioOfDefaultRows(double) const override
{
return 0.0;
}
UInt64 getNumberOfDefaultRows() const override
{
return 0;
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for ColumnAggregateFunction");
}
void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;

View File

@ -629,7 +629,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
{
size_t col_size = getOffsets().size();
if (col_size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), col_size);
if (0 == col_size)
return ColumnArray::create(data);
@ -697,7 +697,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
{
size_t size = getOffsets().size();
if (size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), size);
if (size == 0)
return ColumnArray::create(data);
@ -963,6 +963,20 @@ ColumnPtr ColumnArray::compress() const
});
}
double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
}
UInt64 ColumnArray::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnArray>();
}
void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{

View File

@ -172,17 +172,17 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(offsets);
callback(data);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(offsets);
callback(*offsets);
offsets->forEachSubcolumnRecursively(callback);
callback(data);
callback(*data);
data->forEachSubcolumnRecursively(callback);
}
@ -193,6 +193,10 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
bool isCollationSupported() const override { return getData().isCollationSupported(); }
size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return getOffsets()[i - 1]; }

View File

@ -257,6 +257,21 @@ public:
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -132,6 +132,9 @@ public:
void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
double getRatioOfDefaultRows(double) const override { throwMustBeDecompressed(); }
UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); }
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); }
protected:
size_t rows;

View File

@ -26,6 +26,7 @@
#include <Columns/IColumn.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Common/PODArray.h>
namespace DB
@ -207,6 +208,7 @@ public:
}
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -253,7 +255,7 @@ public:
data->getExtremes(min, max);
}
void forEachSubcolumn(ColumnCallback callback) override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(data);
}
@ -271,6 +273,27 @@ public:
return false;
}
double getRatioOfDefaultRows(double) const override
{
return data->isDefaultAt(0) ? 1.0 : 0.0;
}
UInt64 getNumberOfDefaultRows() const override
{
return data->isDefaultAt(0) ? s : 0;
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
if (!data->isDefaultAt(0))
{
size_t to = limit && from + limit < size() ? from + limit : size();
indices.reserve(indices.size() + to - from);
for (size_t i = from; i < to; ++i)
indices.push_back(i);
}
}
bool isNullable() const override { return isColumnNullable(*data); }
bool onlyNull() const override { return data->isNullAt(0); }
bool isNumeric() const override { return data->isNumeric(); }

View File

@ -175,6 +175,7 @@ public:
bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -198,6 +199,21 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr compress() const override;

View File

@ -177,6 +177,7 @@ public:
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -214,6 +215,21 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnFixedString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
}
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }

View File

@ -187,6 +187,14 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
captured_columns.push_back(column);
}
DataTypePtr ColumnFunction::getResultType() const
{
// if (recursively_convert_result_to_full_column_if_low_cardinality)
// return recursiveRemoveLowCardinality(function->getResultType());
return function->getResultType();
}
ColumnWithTypeAndName ColumnFunction::reduce() const
{
auto args = function->getArgumentTypes().size();
@ -203,4 +211,12 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
return res;
}
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column)
{
const ColumnFunction * column_function;
if ((column_function = typeid_cast<const ColumnFunction *>(column.get())) && column_function->isShortCircuitArgument())
return column_function;
return nullptr;
}
}

View File

@ -155,6 +155,25 @@ public:
throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
DataTypePtr getResultType() const;
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
bool isShortCircuitArgument() const { return false; }
private:
size_t size_;
FunctionBasePtr function;
@ -163,4 +182,6 @@ private:
void appendArgument(const ColumnWithTypeAndName & column);
};
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column);
}

View File

@ -323,7 +323,7 @@ public:
return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes();
}
void forEachSubcolumn(ColumnCallback callback) override
void forEachSubcolumn(MutableColumnCallback callback) override
{
if (full_state)
{
@ -338,22 +338,22 @@ public:
callback(dictionary.getColumnUniquePtr());
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
if (isFullState())
{
callback(nested_column);
callback(*nested_column);
nested_column->forEachSubcolumnRecursively(callback);
}
callback(idx.getPositionsPtr());
callback(*idx.getPositionsPtr());
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
/// Column doesn't own dictionary if it's shared.
if (!dictionary.isShared())
{
callback(dictionary.getColumnUniquePtr());
callback(*dictionary.getColumnUniquePtr());
dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback);
}
}
@ -371,6 +371,21 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getIndexes().getRatioOfDefaultRows(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getIndexes().getNumberOfDefaultRows();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndexes().getIndicesOfNonDefaultRows(indices, from, limit);
}
bool valuesHaveFixedSize() const override
{
if (full_state)
@ -482,6 +497,7 @@ public:
const ColumnPtr & getPositions() const { return positions; }
WrappedPtr & getPositionsPtr() { return positions; }
const WrappedPtr & getPositionsPtr() const { return positions; }
size_t getPositionAt(size_t row) const;
void insertPosition(UInt64 position);
void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit);

View File

@ -383,14 +383,14 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
max = std::move(map_max_value);
}
void ColumnMap::forEachSubcolumn(ColumnCallback callback)
void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
{
nested->forEachSubcolumn(callback);
}
void ColumnMap::forEachSubcolumnRecursively(ColumnCallback callback)
void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
callback(nested);
callback(*nested);
nested->forEachSubcolumnRecursively(callback);
}
@ -410,6 +410,21 @@ ColumnPtr ColumnMap::compress() const
});
}
double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
}
UInt64 ColumnMap::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnMap>();
}
void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
}
/**
* Generic implementation of get implicit value column based on key value.
* TODO: specialize this function for Number type and String type.

View File

@ -117,9 +117,12 @@ public:
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
/** Access embeded columns*/
const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }

View File

@ -149,17 +149,17 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(nested_column);
callback(null_map);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(nested_column);
callback(*nested_column);
nested_column->forEachSubcolumnRecursively(callback);
callback(null_map);
callback(*null_map);
null_map->forEachSubcolumnRecursively(callback);
}
@ -170,6 +170,21 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnNullable>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnNullable>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
}
bool isNullable() const override { return true; }
bool isFixedAndContiguous() const override { return false; }
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }

1095
src/Columns/ColumnObject.cpp Normal file

File diff suppressed because it is too large Load Diff

271
src/Columns/ColumnObject.h Normal file
View File

@ -0,0 +1,271 @@
#pragma once
#include <Core/Field.h>
#include <Core/Names.h>
#include <Columns/IColumn.h>
#include <Common/PODArray.h>
#include <Common/HashTable/HashMap.h>
#include <DataTypes/Serializations/JSONDataParser.h>
#include <DataTypes/Serializations/SubcolumnsTree.h>
#include <DataTypes/IDataType.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/// Info that represents a scalar or array field in a decomposed view.
/// It allows to recreate field with different number
/// of dimensions or nullability.
struct FieldInfo
{
/// The common type of of all scalars in field.
DataTypePtr scalar_type;
/// Do we have NULL scalar in field.
bool have_nulls;
/// If true then we have scalars with different types in array and
/// we need to convert scalars to the common type.
bool need_convert;
/// Number of dimension in array. 0 if field is scalar.
size_t num_dimensions;
/// If true then this field is an array of variadic dimension field
/// and we need to normalize the dimension
bool need_fold_dimension;
};
FieldInfo getFieldInfo(const Field & field);
/** A column that represents object with dynamic set of subcolumns.
* Subcolumns are identified by paths in document and are stored in
* a trie-like structure. ColumnObject is not suitable for writing into tables
* and it should be converted to Tuple with fixed set of subcolumns before that.
*/
class ColumnObject final : public COWHelper<IColumn, ColumnObject>
{
public:
/** Class that represents one subcolumn.
* It stores values in several parts of column
* and keeps current common type of all parts.
* We add a new column part with a new type, when we insert a field,
* which can't be converted to the current common type.
* After insertion of all values subcolumn should be finalized
* for writing and other operations.
*/
class Subcolumn
{
public:
Subcolumn() = default;
Subcolumn(size_t size_, bool is_nullable_);
Subcolumn(MutableColumnPtr && data_, bool is_nullable_);
size_t size() const;
size_t byteSize() const;
size_t allocatedBytes() const;
void get(size_t n, Field & res) const;
bool isFinalized() const;
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); }
size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); }
/// Checks the consistency of column's parts stored in @data.
void checkTypes() const;
/// Inserts a field, which scalars can be arbitrary, but number of
/// dimensions should be consistent with current common type.
void insert(Field field);
void insert(Field field, FieldInfo info);
void insertDefault();
void insertManyDefaults(size_t length);
void insertRangeFrom(const Subcolumn & src, size_t start, size_t length);
void popBack(size_t n);
Subcolumn cut(size_t start, size_t length) const;
/// Converts all column's parts to the common type and
/// creates a single column that stores all values.
void finalize();
/// Returns last inserted field.
Field getLastField() const;
FieldInfo getFieldInfo() const;
/// Recreates subcolumn with default scalar values and keeps sizes of arrays.
/// Used to create columns of type Nested with consistent array sizes.
Subcolumn recreateWithDefaultValues(const FieldInfo & field_info) const;
/// Returns single column if subcolumn in finalizes.
/// Otherwise -- undefined behaviour.
IColumn & getFinalizedColumn();
const IColumn & getFinalizedColumn() const;
const ColumnPtr & getFinalizedColumnPtr() const;
const std::vector<WrappedPtr> & getData() const { return data; }
size_t getNumberOfDefaultsInPrefix() const { return num_of_defaults_in_prefix; }
friend class ColumnObject;
private:
class LeastCommonType
{
public:
LeastCommonType();
explicit LeastCommonType(DataTypePtr type_);
const DataTypePtr & get() const { return type; }
const DataTypePtr & getBase() const { return base_type; }
size_t getNumberOfDimensions() const { return num_dimensions; }
private:
DataTypePtr type;
DataTypePtr base_type;
size_t num_dimensions = 0;
};
void addNewColumnPart(DataTypePtr type);
/// Current least common type of all values inserted to this subcolumn.
LeastCommonType least_common_type;
/// If true then common type type of subcolumn is Nullable
/// and default values are NULLs.
bool is_nullable = false;
/// Parts of column. Parts should be in increasing order in terms of subtypes/supertypes.
/// That means that the least common type for i-th prefix is the type of i-th part
/// and it's the supertype for all type of column from 0 to i-1.
std::vector<WrappedPtr> data;
/// Until we insert any non-default field we don't know further
/// least common type and we count number of defaults in prefix,
/// which will be converted to the default type of final common type.
size_t num_of_defaults_in_prefix = 0;
size_t num_rows = 0;
};
using Subcolumns = SubcolumnsTree<Subcolumn>;
private:
/// If true then all subcolumns are nullable.
const bool is_nullable;
Subcolumns subcolumns;
size_t num_rows;
public:
static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
explicit ColumnObject(bool is_nullable_);
ColumnObject(Subcolumns && subcolumns_, bool is_nullable_);
/// Checks that all subcolumns have consistent sizes.
void checkConsistency() const;
bool hasSubcolumn(const PathInData & key) const;
const Subcolumn & getSubcolumn(const PathInData & key) const;
Subcolumn & getSubcolumn(const PathInData & key);
void incrementNumRows() { ++num_rows; }
/// Adds a subcolumn from existing IColumn.
void addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn);
/// Adds a subcolumn of specific size with default values.
void addSubcolumn(const PathInData & key, size_t new_size);
/// Adds a subcolumn of type Nested of specific size with default values.
/// It cares about consistency of sizes of Nested arrays.
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
/// Finds a subcolumn from the same Nested type as @entry and inserts
/// an array with default values with consistent sizes as in Nested type.
bool tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const;
bool tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const;
const Subcolumns & getSubcolumns() const { return subcolumns; }
Subcolumns & getSubcolumns() { return subcolumns; }
PathsInData getKeys() const;
/// Part of interface
const char * getFamilyName() const override { return "Object"; }
TypeIndex getDataType() const override { return TypeIndex::Object; }
size_t size() const override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumnPtr cloneResized(size_t new_size) const override;
/// Finalizes all subcolumns.
void finalize() override;
bool isFinalized() const override;
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer) override;
/// All other methods throw exception.
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); }
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); }
UInt64 getNumberOfDefaultRows() const override { throwMustBeConcrete(); }
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); }
private:
[[noreturn]] static void throwMustBeConcrete()
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ColumnObject must be converted to ColumnTuple before use");
}
template <typename Func>
MutableColumnPtr applyForSubcolumns(Func && func) const;
/// It's used to get shared sized of Nested to insert correct default values.
const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const;
};
}

View File

@ -299,6 +299,21 @@ public:
void validate() const;
bool isCollationSupported() const override { return true; }
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
};

View File

@ -214,7 +214,6 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
return res;
}
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
{
size_t size = offsets.size();

View File

@ -310,6 +310,21 @@ public:
return typeid(rhs) == typeid(ColumnString);
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
}
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }

View File

@ -503,18 +503,18 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
max = max_tuple;
}
void ColumnTuple::forEachSubcolumn(ColumnCallback callback)
void ColumnTuple::forEachSubcolumn(MutableColumnCallback callback)
{
for (auto & column : columns)
callback(column);
}
void ColumnTuple::forEachSubcolumnRecursively(ColumnCallback callback)
void ColumnTuple::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
for (auto & column : columns)
{
callback(column);
callback(*column);
column->forEachSubcolumnRecursively(callback);
}
}
@ -569,4 +569,19 @@ ColumnPtr ColumnTuple::compress() const
});
}
double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
}
UInt64 ColumnTuple::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnTuple>();
}
void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
}

View File

@ -115,12 +115,14 @@ public:
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
size_t tupleSize() const { return columns.size(); }
const IColumn & getColumn(size_t idx) const { return *columns[idx]; }

View File

@ -37,7 +37,7 @@
#include <common/range.h>
#include <common/unaligned.h>
#include "Columns/ColumnConst.h"
#include <Columns/ColumnConst.h>
namespace DB
@ -128,7 +128,10 @@ public:
return column_holder->allocatedBytes() + reverse_index.allocatedBytes()
+ (nested_null_mask ? nested_null_mask->allocatedBytes() : 0);
}
void forEachSubcolumn(IColumn::ColumnCallback callback) override
void forEachSubcolumn(IColumn::ColumnCallback callback) const override { callback(column_holder); }
void forEachSubcolumn(IColumn::MutableColumnCallback callback) override
{
callback(column_holder);
reverse_index.setColumn(getRawColumnPtr());
@ -136,6 +139,21 @@ public:
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
void forEachSubcolumnRecursively(IColumn::RecursiveColumnCallback callback) const override
{
callback(*column_holder);
column_holder->forEachSubcolumnRecursively(callback);
}
void forEachSubcolumnRecursively(IColumn::RecursiveMutableColumnCallback callback) override
{
callback(*column_holder);
column_holder->forEachSubcolumnRecursively(callback);
reverse_index.setColumn(getRawColumnPtr());
if (is_nullable)
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
void forEachSubcolumnRecursively(IColumn::ColumnCallback callback) override
{
callback(column_holder);

View File

@ -356,6 +356,21 @@ public:
return typeid(rhs) == typeid(ColumnVector<T>);
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr compress() const override;
/// Replace elements that match the filter with zeroes. If inverted replaces not matched elements.

View File

@ -15,12 +15,10 @@ String IColumn::dumpStructure() const
WriteBufferFromOwnString res;
res << getFamilyName() << "(size = " << size();
ColumnCallback callback = [&](ColumnPtr & subcolumn)
forEachSubcolumn([&](const auto & subcolumn)
{
res << ", " << subcolumn->dumpStructure();
};
const_cast<IColumn*>(this)->forEachSubcolumn(callback);
});
res << ")";
return res.str();
@ -31,6 +29,50 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
insert(src[n]);
}
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
{
if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size());
auto res = cloneEmpty();
res->reserve(total_rows);
ssize_t current_offset = -1;
for (size_t i = 0; i < offsets.size(); ++i)
{
ssize_t offsets_diff = static_cast<ssize_t>(offsets[i]) - current_offset;
current_offset = offsets[i];
if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1);
res->insertFrom(*this, i + shift);
}
ssize_t offsets_diff = static_cast<ssize_t>(total_rows) - current_offset;
if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1);
return res;
}
void IColumn::forEachSubcolumn(ColumnCallback callback) const
{
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
{
callback(std::as_const(subcolumn));
});
}
void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
{
std::as_const(*this).forEachSubcolumnRecursively([&callback](const IColumn & subcolumn)
{
callback(const_cast<IColumn &>(subcolumn));
});
}
bool isColumnNullable(const IColumn & column)
{
return checkColumn<ColumnNullable>(column);

View File

@ -91,6 +91,10 @@ public:
/// If column is ColumnLowCardinality, transforms is to full column.
virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
/// If column isn't ColumnSparse, return itself.
/// If column is ColumnSparse, transforms it to full column.
[[nodiscard]] virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
/// Creates empty column with the same type.
virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
@ -209,6 +213,13 @@ public:
insertFrom(src, position);
}
/// Appends one field multiple times. Can be optimized in inherited classes.
virtual void insertMany(const Field & field, size_t length)
{
for (size_t i = 0; i < length; ++i)
insert(field);
}
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
/// Is used to optimize some computations (in aggregation, for example).
/// Parameter length could be ignored if column values have fixed size.
@ -414,8 +425,18 @@ public:
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
/// Shallow: doesn't do recursive calls; don't do call for itself.
using ColumnCallback = std::function<void(WrappedPtr&)>;
virtual void forEachSubcolumn(ColumnCallback) {}
using ColumnCallback = std::function<void(const WrappedPtr &)>;
virtual void forEachSubcolumn(ColumnCallback) const;
using MutableColumnCallback = std::function<void(WrappedPtr &)>;
virtual void forEachSubcolumn(MutableColumnCallback) {}
using RecursiveColumnCallback = std::function<void(const IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const;
using RecursiveMutableColumnCallback = std::function<void(IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback) {}
/// Similar to forEachSubcolumn but it also do recursive calls.
virtual void forEachSubcolumnRecursively(ColumnCallback) {}
@ -427,6 +448,23 @@ public:
throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Returns ratio of values in column, that are equal to default value of column.
/// Checks only @sample_ratio ratio of rows.
[[nodiscard]] virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
/// Returns number of values in column, that are equal to default value of column.
[[nodiscard]] virtual UInt64 getNumberOfDefaultRows() const = 0;
/// Returns indices of values in column, that not equal to default value of column.
virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0;
/// Returns column with @total_size elements.
/// In result column values from current column are at positions from @offsets.
/// Other values are filled by @default_value.
/// @shift means how much rows to skip from the beginning of current column.
/// Used to create full column from sparse.
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
/// Compress column in memory to some representation that allows to decompress it back.
/// Return itself if compression is not applicable for this column type.
virtual Ptr compress() const
@ -442,6 +480,16 @@ public:
return getPtr();
}
/// Some columns may require finalization before using of other operations.
virtual void finalize() {}
virtual bool isFinalized() const { return true; }
MutablePtr cloneFinalized() const
{
auto finalized = IColumn::mutate(getPtr());
finalized->finalize();
return finalized;
}
static MutablePtr mutate(Ptr ptr)
{
@ -509,6 +557,8 @@ public:
virtual bool lowCardinality() const { return false; }
[[nodiscard]] virtual bool isSparse() const { return false; }
virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
@ -545,6 +595,9 @@ protected:
template <typename Derived>
double getRatioOfDefaultRowsImpl(double sample_ratio) const;
template <typename Derived>
UInt64 getNumberOfDefaultRowsImpl() const;
template <typename Derived>
void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const;

View File

@ -180,6 +180,21 @@ public:
return res;
}
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
}
void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
}
void gather(ColumnGathererStream &) override
{
throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);

View File

@ -195,6 +195,16 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
return static_cast<double>(res) / num_checked_rows;
}
template <typename Derived>
UInt64 IColumn::getNumberOfDefaultRowsImpl() const
{
UInt64 res = 0;
size_t num_rows = size();
for (size_t i = 0; i < num_rows; ++i)
res += static_cast<const Derived &>(*this).isDefaultAt(i);
return res;
}
template <typename Derived>
void IColumn::getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const
{

View File

@ -210,6 +210,21 @@ public:
{
throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
double getRatioOfDefaultRows(double) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getRatioOfDefaultRows' not implemented for ColumnUnique");
}
UInt64 getNumberOfDefaultRows() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getNumberOfDefaultRows' not implemented for ColumnUnique");
}
void getIndicesOfNonDefaultRows(IColumn::Offsets &, size_t, size_t) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getIndicesOfNonDefaultRows' not implemented for ColumnUnique");
}
};
using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;

View File

@ -48,6 +48,7 @@
M(11, POSITION_OUT_OF_BOUND) \
M(12, PARAMETER_OUT_OF_BOUND) \
M(13, SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH) \
M(14, TOO_MANY_SUBCOLUMNS_IN_JSON) \
M(15, DUPLICATE_COLUMN) \
M(16, NO_SUCH_COLUMN_IN_TABLE) \
M(17, DELIMITER_IN_STRING_LITERAL_DOESNT_MATCH) \

View File

@ -149,6 +149,19 @@ void FieldVisitorCompatibleWriteBinary::operator()(const BitMap64 & x, WriteBuff
writeString(buffer.data(), bytes, buf);
}
void FieldVisitorCompatibleWriteBinary::operator()(const Object & x, WriteBuffer & buf) const
{
const size_t size = x.size();
writeBinary(size, buf);
for (const auto & [key, value] : x)
{
const UInt8 type = value.getType();
writeBinary(type, buf);
writeBinary(key, buf);
Field::dispatch([&buf] (const auto & val) { FieldVisitorCompatibleWriteBinary()(val, buf); }, value);
}
}
void FieldVisitorCompatibleReadBinary::deserialize(UInt64 & value, ReadBuffer & buf)
{
@ -306,4 +319,19 @@ void FieldVisitorCompatibleReadBinary::deserialize(BitMap64 & value, ReadBuffer
value = BitMap64::readSafe(bitmap_buffer.data(), bytes);
}
void FieldVisitorCompatibleReadBinary::deserialize(Object & value, ReadBuffer & buf)
{
size_t size;
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
{
UInt8 type;
String key;
readBinary(type, buf);
readBinary(key, buf);
value[key] = Field::dispatch(FieldVisitorCompatibleReadBinary(buf), static_cast<Field::Types::Which>(type));
}
}
}

View File

@ -35,6 +35,7 @@ public:
void operator()(const DecimalField<Decimal256> & x, WriteBuffer & buf) const;
void operator()(const AggregateFunctionStateData & x, WriteBuffer & buf) const;
void operator()(const BitMap64 & x, WriteBuffer & buf) const;
void operator()(const Object & x, WriteBuffer & buf) const;
void operator()(const IPv4 & x, WriteBuffer & buf) const;
void operator()(const IPv6 & x, WriteBuffer & buf) const;
};
@ -97,6 +98,7 @@ private:
static void deserialize(Tuple & value, ReadBuffer & buf);
static void deserialize(Map & value, ReadBuffer & buf);
static void deserialize(BitMap64 & value, ReadBuffer & buf);
static void deserialize(Object & value, ReadBuffer & buf);
};
}

View File

@ -77,6 +77,11 @@ public:
throw Exception("Cannot convert Map to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
}
T operator() (const Object &) const
{
throw Exception("Cannot convert Object to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
}
T operator() (const UInt64 & x) const { return T(x); }
T operator() (const Int64 & x) const { return T(x); }
T operator() (const Int128 & x) const { return T(x); }

View File

@ -138,4 +138,21 @@ String FieldVisitorDump::operator() (const BitMap64 & x) const
return wb.str();
}
String FieldVisitorDump::operator() (const Object & x) const
{
WriteBufferFromOwnString wb;
wb << "Object_(";
for (auto it = x.begin(); it != x.end(); ++it)
{
if (it != x.begin())
wb << ", ";
wb << "(" << it->first << ", " << applyVisitor(*this, it->second) << ")";
}
wb << ')';
return wb.str();
}
}

View File

@ -53,6 +53,7 @@ public:
String operator() (const DecimalField<Decimal256> & x) const;
String operator() (const AggregateFunctionStateData & x) const;
String operator() (const BitMap64 & x) const;
String operator() (const Object & x) const;
};
}

View File

@ -238,4 +238,17 @@ void FieldVisitorHash::operator() (const BitMap64 & x) const
applyVisitor(*this, Field(*it));
}
void FieldVisitorHash::operator() (const Object & x) const
{
UInt8 type = Field::Types::Object;
hash.update(type);
hash.update(x.size());
for (const auto & [key, value]: x)
{
hash.update(key);
applyVisitor(*this, value);
}
}
}

View File

@ -59,6 +59,7 @@ public:
void operator() (const DecimalField<Decimal256> & x) const;
void operator() (const AggregateFunctionStateData & x) const;
void operator() (const BitMap64 & x) const;
void operator() (const Object & x) const;
};
}

View File

@ -53,6 +53,7 @@ bool FieldVisitorSum::operator() (UUID &) const { throw Exception("Cannot sum UU
bool FieldVisitorSum::operator() (IPv4 &) const { throw Exception("Cannot sum IPv4s", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (IPv6 &) const { throw Exception("Cannot sum IPv6s", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (BitMap64 &) const { throw Exception("Cannot sum BitMap64", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (Object &) const { throw Exception("Cannot sum Objects", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (AggregateFunctionStateData &) const
{

View File

@ -53,6 +53,7 @@ public:
bool operator() (IPv6 &) const;
bool operator() (AggregateFunctionStateData &) const;
bool operator() (BitMap64 &) const;
bool operator() (Object &) const;
template <typename T>
bool operator() (DecimalField<T> & x) const

View File

@ -160,4 +160,23 @@ String FieldVisitorToString::operator() (const BitMap64 & x) const
return wb.str();
}
String FieldVisitorToString::operator() (const Object & x) const
{
WriteBufferFromOwnString wb;
wb << '{';
for (auto it = x.begin(); it != x.end(); ++it)
{
if (it != x.begin())
wb << ", ";
writeDoubleQuoted(it->first, wb);
wb << ": " << applyVisitor(*this, it->second);
}
wb << '}';
return wb.str();
}
}

View File

@ -53,6 +53,7 @@ public:
String operator() (const DecimalField<Decimal256> & x) const;
String operator() (const AggregateFunctionStateData & x) const;
String operator() (const BitMap64 & x) const;
String operator() (const Object & x) const;
};
}

View File

@ -104,4 +104,18 @@ void FieldVisitorWriteBinary::operator() (const BitMap64 & x, WriteBuffer & buf)
writeString(buffer.data(), bytes, buf);
}
void FieldVisitorWriteBinary::operator() (const Object & x, WriteBuffer & buf) const
{
const size_t size = x.size();
writeBinary(size, buf);
for (const auto & [key, value] : x)
{
const UInt8 type = value.getType();
writeBinary(type, buf);
writeBinary(key, buf);
Field::dispatch([&buf] (const auto & val) { FieldVisitorWriteBinary()(val, buf); }, value);
}
}
}

View File

@ -52,6 +52,7 @@ public:
void operator() (const DecimalField<Decimal256> & x, WriteBuffer & buf) const;
void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const;
void operator() (const BitMap64 & x, WriteBuffer & buf) const;
void operator() (const Object & x, WriteBuffer & buf) const;
};
}

View File

@ -2,9 +2,6 @@
#include <Common/Exception.h>
#include <common/types.h>
#include <common/defines.h>
#include "ElementTypes.h"
namespace DB
{
@ -25,26 +22,25 @@ struct DummyJSONParser
class Element
{
public:
Element() = default;
static ElementType type() { return ElementType::NULL_VALUE; }
static bool isInt64() { return false; }
static bool isUInt64() { return false; }
static bool isDouble() { return false; }
static bool isString() { return false; }
static bool isArray() { return false; }
static bool isObject() { return false; }
static bool isBool() { return false; }
static bool isNull() { return false; }
Element() {}
bool isInt64() const { return false; }
bool isUInt64() const { return false; }
bool isDouble() const { return false; }
bool isString() const { return false; }
bool isArray() const { return false; }
bool isObject() const { return false; }
bool isBool() const { return false; }
bool isNull() const { return false; }
static Int64 getInt64() { return 0; }
static UInt64 getUInt64() { return 0; }
static double getDouble() { return 0; }
static bool getBool() { return false; }
static std::string_view getString() { return {}; }
static Array getArray() { return {}; }
static Object getObject() { return {}; }
Int64 getInt64() const { return 0; }
UInt64 getUInt64() const { return 0; }
double getDouble() const { return 0; }
bool getBool() const { return false; }
std::string_view getString() const { return {}; }
Array getArray() const { return {}; }
Object getObject() const { return {}; }
static Element getElement() { return {}; }
Element getElement() { return {}; }
};
/// References an array in a JSON document.
@ -56,14 +52,14 @@ struct DummyJSONParser
public:
Element operator*() const { return {}; }
Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; } /// NOLINT
Iterator operator++(int) { return *this; }
friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; }
};
static Iterator begin() { return {}; }
static Iterator end() { return {}; }
static size_t size() { return 0; }
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
Element operator[](size_t) const { return {}; }
};
@ -78,15 +74,15 @@ struct DummyJSONParser
public:
KeyValuePair operator*() const { return {}; }
Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; } /// NOLINT
Iterator operator++(int) { return *this; }
friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; }
};
static Iterator begin() { return {}; }
static Iterator end() { return {}; }
static size_t size() { return 0; }
bool find(std::string_view, Element &) const { return false; } /// NOLINT
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
bool find(const std::string_view &, Element &) const { return false; }
#if 0
/// Optional: Provides access to an object's element by index.
@ -95,7 +91,7 @@ struct DummyJSONParser
};
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(std::string_view, Element &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Functions JSON* are not supported"); } /// NOLINT
bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; }
#if 0
/// Optional: Allocates memory to parse JSON documents faster.

View File

@ -2,6 +2,9 @@
#include <Common/Exception.h>
#include <common/types.h>
#include <common/defines.h>
#include "ElementTypes.h"
namespace DB
{
@ -22,25 +25,26 @@ struct DummyJSONParser
class Element
{
public:
Element() {}
bool isInt64() const { return false; }
bool isUInt64() const { return false; }
bool isDouble() const { return false; }
bool isString() const { return false; }
bool isArray() const { return false; }
bool isObject() const { return false; }
bool isBool() const { return false; }
bool isNull() const { return false; }
Element() = default;
static ElementType type() { return ElementType::NULL_VALUE; }
static bool isInt64() { return false; }
static bool isUInt64() { return false; }
static bool isDouble() { return false; }
static bool isString() { return false; }
static bool isArray() { return false; }
static bool isObject() { return false; }
static bool isBool() { return false; }
static bool isNull() { return false; }
Int64 getInt64() const { return 0; }
UInt64 getUInt64() const { return 0; }
double getDouble() const { return 0; }
bool getBool() const { return false; }
std::string_view getString() const { return {}; }
Array getArray() const { return {}; }
Object getObject() const { return {}; }
static Int64 getInt64() { return 0; }
static UInt64 getUInt64() { return 0; }
static double getDouble() { return 0; }
static bool getBool() { return false; }
static std::string_view getString() { return {}; }
static Array getArray() { return {}; }
static Object getObject() { return {}; }
Element getElement() { return {}; }
static Element getElement() { return {}; }
};
/// References an array in a JSON document.
@ -52,14 +56,14 @@ struct DummyJSONParser
public:
Element operator*() const { return {}; }
Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; }
Iterator operator++(int) { return *this; } /// NOLINT
friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; }
};
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
static Iterator begin() { return {}; }
static Iterator end() { return {}; }
static size_t size() { return 0; }
Element operator[](size_t) const { return {}; }
};
@ -74,15 +78,15 @@ struct DummyJSONParser
public:
KeyValuePair operator*() const { return {}; }
Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; }
Iterator operator++(int) { return *this; } /// NOLINT
friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; }
};
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
bool find(const std::string_view &, Element &) const { return false; }
static Iterator begin() { return {}; }
static Iterator end() { return {}; }
static size_t size() { return 0; }
bool find(std::string_view, Element &) const { return false; } /// NOLINT
#if 0
/// Optional: Provides access to an object's element by index.
@ -91,7 +95,7 @@ struct DummyJSONParser
};
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; }
bool parse(std::string_view, Element &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Functions JSON* are not supported"); } /// NOLINT
#if 0
/// Optional: Allocates memory to parse JSON documents faster.

View File

@ -25,6 +25,8 @@
#cmakedefine01 USE_KRB5
#cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_BZIP2
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON
#cmakedefine01 USE_JAVA_EXTENSIONS
#cmakedefine01 USE_TSQUERY
#cmakedefine01 USE_SIMDJSON

View File

@ -150,6 +150,12 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
DB::readBinary(value.toUnderType(), buf);
return value;
}
case Field::Types::Object:
{
Object value;
readBinary(value, buf);
return value;
}
}
return Field();
}
@ -274,6 +280,40 @@ void writeBinary(const BitMap64 & x, WriteBuffer & buf)
writeString(tmp_buf.data(), bytes, buf);
}
void readBinary(Object & x, ReadBuffer & buf)
{
size_t size;
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
{
UInt8 type;
String key;
readBinary(type, buf);
readBinary(key, buf);
x[key] = getBinaryValue(type, buf);
}
}
void writeBinary(const Object & x, WriteBuffer & buf)
{
const size_t size = x.size();
writeBinary(size, buf);
for (const auto & [key, value] : x)
{
const UInt8 type = value.getType();
writeBinary(type, buf);
writeBinary(key, buf);
Field::dispatch([&buf] (const auto & val) { FieldVisitorWriteBinary()(val, buf); }, value);
}
}
void writeText(const Object & x, WriteBuffer & buf)
{
writeFieldText(Field(x), buf);
}
template <typename T>
void readQuoted(DecimalField<T> & x, ReadBuffer & buf)
{

View File

@ -23,6 +23,7 @@
#include <cassert>
#include <vector>
#include <map>
#include <algorithm>
#include <type_traits>
#include <functional>
@ -84,6 +85,18 @@ DEFINE_FIELD_VECTOR(Tuple);
#undef DEFINE_FIELD_VECTOR
using FieldMap = std::map<String, Field, std::less<String>, AllocatorWithMemoryTracking<std::pair<const String, Field>>>;
#define DEFINE_FIELD_MAP(X) \
struct X : public FieldMap \
{ \
using FieldMap::FieldMap; \
}
DEFINE_FIELD_MAP(Object);
#undef DEFINE_FIELD_MAP
struct AggregateFunctionStateData
{
String name; /// Name with arguments.
@ -246,6 +259,7 @@ template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<Map> { using Type = Map; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<Object> { using Type = Object; };
template <> struct NearestFieldTypeImpl<IPv4> { using Type = IPv4; };
template <> struct NearestFieldTypeImpl<IPv6> { using Type = IPv6; };
template <> struct NearestFieldTypeImpl<NegativeInfinity> { using Type = NegativeInfinity; };
@ -306,6 +320,7 @@ public:
(SketchBinary, 29),
(IPv4, 30),
(IPv6, 31),
(Object, 32),
// Special types for index analysis
(NegativeInfinity, 254),
(PositiveInfinity, 255));
@ -519,6 +534,7 @@ public:
case Types::Array: return get<Array>() < rhs.get<Array>();
case Types::Tuple: return get<Tuple>() < rhs.get<Tuple>();
case Types::Map: return get<Map>() < rhs.get<Map>();
case Types::Object: return get<Object>() < rhs.get<Object>();
case Types::Decimal32: return get<DecimalField<Decimal32>>() < rhs.get<DecimalField<Decimal32>>();
case Types::Decimal64: return get<DecimalField<Decimal64>>() < rhs.get<DecimalField<Decimal64>>();
case Types::Decimal128: return get<DecimalField<Decimal128>>() < rhs.get<DecimalField<Decimal128>>();
@ -563,6 +579,7 @@ public:
case Types::Array: return get<Array>() <= rhs.get<Array>();
case Types::Tuple: return get<Tuple>() <= rhs.get<Tuple>();
case Types::Map: return get<Map>() <= rhs.get<Map>();
case Types::Object: return get<Object>() <= rhs.get<Object>();
case Types::Decimal32: return get<DecimalField<Decimal32>>() <= rhs.get<DecimalField<Decimal32>>();
case Types::Decimal64: return get<DecimalField<Decimal64>>() <= rhs.get<DecimalField<Decimal64>>();
case Types::Decimal128: return get<DecimalField<Decimal128>>() <= rhs.get<DecimalField<Decimal128>>();
@ -617,6 +634,7 @@ public:
case Types::Decimal256: return get<DecimalField<Decimal256>>() == rhs.get<DecimalField<Decimal256>>();
case Types::AggregateFunctionState: return get<AggregateFunctionStateData>() == rhs.get<AggregateFunctionStateData>();
case Types::BitMap64: return get<BitMap64>() == rhs.get<BitMap64>();
case Types::Object: return get<Object>() == rhs.get<Object>();
}
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
@ -663,6 +681,7 @@ public:
case Types::Decimal256: return f(field.template get<DecimalField<Decimal256>>());
case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
case Types::BitMap64: return f(field.template get<BitMap64>());
case Types::Object: return f(field.template get<Object>());
#if !defined(__clang__)
#pragma GCC diagnostic pop
#endif
@ -726,6 +745,8 @@ public:
return f.template operator()<AggregateFunctionStateData>();
case Types::BitMap64:
return f.template operator()<BitMap64>();
case Types::Object:
return f.template operator()<Object>();
case Types::IPv4:
return f.template operator()<IPv4>();
case Types::IPv6:
@ -885,6 +906,9 @@ private:
case Types::BitMap64:
destroy<BitMap64>();
break;
case Types::Object:
destroy<Object>();
break;
default:
break;
}
@ -906,30 +930,31 @@ private:
using Row = std::vector<Field>;
template <> struct Field::TypeToEnum<Null> { static const Types::Which value = Types::Null; };
template <> struct Field::TypeToEnum<NegativeInfinity> { static const Types::Which value = Types::NegativeInfinity; };
template <> struct Field::TypeToEnum<PositiveInfinity> { static const Types::Which value = Types::PositiveInfinity; };
template <> struct Field::TypeToEnum<UInt64> { static const Types::Which value = Types::UInt64; };
template <> struct Field::TypeToEnum<UInt128> { static const Types::Which value = Types::UInt128; };
template <> struct Field::TypeToEnum<UInt256> { static const Types::Which value = Types::UInt256; };
template <> struct Field::TypeToEnum<Int64> { static const Types::Which value = Types::Int64; };
template <> struct Field::TypeToEnum<Int128> { static const Types::Which value = Types::Int128; };
template <> struct Field::TypeToEnum<Int256> { static const Types::Which value = Types::Int256; };
template <> struct Field::TypeToEnum<UUID> { static const Types::Which value = Types::UUID; };
template <> struct Field::TypeToEnum<Null> { static constexpr Types::Which value = Types::Null; };
template <> struct Field::TypeToEnum<NegativeInfinity> { static constexpr Types::Which value = Types::NegativeInfinity; };
template <> struct Field::TypeToEnum<PositiveInfinity> { static constexpr Types::Which value = Types::PositiveInfinity; };
template <> struct Field::TypeToEnum<UInt64> { static constexpr Types::Which value = Types::UInt64; };
template <> struct Field::TypeToEnum<UInt128> { static constexpr Types::Which value = Types::UInt128; };
template <> struct Field::TypeToEnum<UInt256> { static constexpr Types::Which value = Types::UInt256; };
template <> struct Field::TypeToEnum<Int64> { static constexpr Types::Which value = Types::Int64; };
template <> struct Field::TypeToEnum<Int128> { static constexpr Types::Which value = Types::Int128; };
template <> struct Field::TypeToEnum<Int256> { static constexpr Types::Which value = Types::Int256; };
template <> struct Field::TypeToEnum<UUID> { static constexpr Types::Which value = Types::UUID; };
template <> struct Field::TypeToEnum<IPv4> { static constexpr Types::Which value = Types::IPv4; };
template <> struct Field::TypeToEnum<IPv6> { static constexpr Types::Which value = Types::IPv6; };
template <> struct Field::TypeToEnum<Float64> { static const Types::Which value = Types::Float64; };
template <> struct Field::TypeToEnum<String> { static const Types::Which value = Types::String; };
template <> struct Field::TypeToEnum<Array> { static const Types::Which value = Types::Array; };
template <> struct Field::TypeToEnum<Tuple> { static const Types::Which value = Types::Tuple; };
template <> struct Field::TypeToEnum<Map> { static const Types::Which value = Types::Map; };
template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static const Types::Which value = Types::Decimal32; };
template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static const Types::Which value = Types::Decimal64; };
template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static const Types::Which value = Types::Decimal128; };
template <> struct Field::TypeToEnum<DecimalField<Decimal256>>{ static const Types::Which value = Types::Decimal256; };
template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static const Types::Which value = Types::Decimal64; };
template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static const Types::Which value = Types::AggregateFunctionState; };
template <> struct Field::TypeToEnum<BitMap64>{ static const Types::Which value = Types::BitMap64; };
template <> struct Field::TypeToEnum<Float64> { static constexpr Types::Which value = Types::Float64; };
template <> struct Field::TypeToEnum<String> { static constexpr Types::Which value = Types::String; };
template <> struct Field::TypeToEnum<Array> { static constexpr Types::Which value = Types::Array; };
template <> struct Field::TypeToEnum<Tuple> { static constexpr Types::Which value = Types::Tuple; };
template <> struct Field::TypeToEnum<Map> { static constexpr Types::Which value = Types::Map; };
template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static constexpr Types::Which value = Types::Decimal32; };
template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static constexpr Types::Which value = Types::Decimal64; };
template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static constexpr Types::Which value = Types::Decimal128; };
template <> struct Field::TypeToEnum<DecimalField<Decimal256>>{ static constexpr Types::Which value = Types::Decimal256; };
template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static constexpr Types::Which value = Types::Decimal64; };
template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static constexpr Types::Which value = Types::AggregateFunctionState; };
template <> struct Field::TypeToEnum<BitMap64>{ static constexpr Types::Which value = Types::BitMap64; };
template <> struct Field::TypeToEnum<Object> { static constexpr Types::Which value = Types::Object; };
template <> struct Field::EnumToType<Field::Types::Null> { using Type = Null; };
template <> struct Field::EnumToType<Field::Types::NegativeInfinity> { using Type = NegativeInfinity; };
@ -948,6 +973,7 @@ template <> struct Field::EnumToType<Field::Types::String> { using Type = Strin
template <> struct Field::EnumToType<Field::Types::Array> { using Type = Array; };
template <> struct Field::EnumToType<Field::Types::Tuple> { using Type = Tuple; };
template <> struct Field::EnumToType<Field::Types::Map> { using Type = Map; };
template <> struct Field::EnumToType<Field::Types::Object> { using Type = Object; };
template <> struct Field::EnumToType<Field::Types::Decimal32> { using Type = DecimalField<Decimal32>; };
template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
@ -1099,34 +1125,39 @@ class WriteBuffer;
/// It is assumed that all elements of the array have the same type.
void readBinary(Array & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Array &, ReadBuffer &) { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Array &, ReadBuffer &) { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); }
/// It is assumed that all elements of the array have the same type.
/// Also write size and type into buf. UInt64 and Int64 is written in variadic size form
void writeBinary(const Array & x, WriteBuffer & buf);
void writeText(const Array & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Array &, WriteBuffer &) { throw Exception("Cannot write Array quoted.", ErrorCodes::NOT_IMPLEMENTED); }
void readBinary(Tuple & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Tuple &, ReadBuffer &) { throw Exception("Cannot read Tuple.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Tuple &, ReadBuffer &) { throw Exception("Cannot read Tuple.", ErrorCodes::NOT_IMPLEMENTED); }
void writeBinary(const Tuple & x, WriteBuffer & buf);
void writeText(const Tuple & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Tuple &, WriteBuffer &) { throw Exception("Cannot write Tuple quoted.", ErrorCodes::NOT_IMPLEMENTED); }
void readBinary(Map & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); }
void writeBinary(const Map & x, WriteBuffer & buf);
void writeText(const Map & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Map &, WriteBuffer &) { throw Exception("Cannot write Map quoted.", ErrorCodes::NOT_IMPLEMENTED); }
void readBinary(Object & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Object &, ReadBuffer &) { throw Exception("Cannot read Object.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Object &, ReadBuffer &) { throw Exception("Cannot read Object.", ErrorCodes::NOT_IMPLEMENTED); }
void writeBinary(const Object & x, WriteBuffer & buf);
void writeText(const Object & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Object &, WriteBuffer &) { throw Exception("Cannot write Object quoted.", ErrorCodes::NOT_IMPLEMENTED); }
__attribute__ ((noreturn)) inline void writeText(const AggregateFunctionStateData &, WriteBuffer &)
{
// This probably doesn't make any sense, but we have to have it for
@ -1156,8 +1187,6 @@ void readFieldBinary(Field & field, ReadBuffer & buf);
void writeFieldBinary(const Field & field, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Tuple &, WriteBuffer &) { throw Exception("Cannot write Tuple quoted.", ErrorCodes::NOT_IMPLEMENTED); }
String toString(const Field & x);
}

View File

@ -340,7 +340,6 @@ enum PreloadLevelSettings : UInt64
M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \
M(Bool, allow_suspicious_low_cardinality_types, true, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
@ -1253,6 +1252,9 @@ enum PreloadLevelSettings : UInt64
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, true, "Allow experimental window functions", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \

View File

@ -47,6 +47,7 @@ TYPEID_MAP(Int256)
TYPEID_MAP(Float32)
TYPEID_MAP(Float64)
TYPEID_MAP(UUID)
TYPEID_MAP(IPv4)
TYPEID_MAP(IPv6)

View File

@ -88,6 +88,7 @@ enum class TypeIndex
BitMap64,
Time,
SketchBinary,
Object,
IPv4,
IPv6,
};
@ -340,6 +341,7 @@ inline constexpr const char * getTypeName(TypeIndex idx)
case TypeIndex::Map: return "Map";
case TypeIndex::BitMap64: return "BitMap64";
case TypeIndex::SketchBinary: return "SketchBinary";
case TypeIndex::Object: return "Object";
}
__builtin_unreachable();

View File

@ -16,4 +16,6 @@
#cmakedefine01 USE_NURAFT
#cmakedefine01 USE_KRB5
#cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON
#cmakedefine01 USE_NLP

View File

@ -126,6 +126,7 @@ IBGJobStatusPersistentStoreProxy::CacheClearer CatalogBGJobStatusPersistentStore
if (catalog) // catalog is nullptr in unittest
statuses_cache = catalog->getBGJobStatuses(type);
is_cache_prefetched = true;
return CacheClearer{this};
}

View File

@ -53,6 +53,8 @@ bvar::Adder<int> & getExecuteMetric(CnchBGThreadType type)
return g_executeImpl_TxnGC;
case CnchBGThreadType::Clustering:
return g_executeImpl_Clustering;
case CnchBGThreadType::ObjectSchemaAssemble:
return g_executeImpl_ObjectSchemaAssemble;
case CnchBGThreadType::MaterializedMySQL:
return g_executeImpl_MaterializedMySQL;
default:
@ -81,6 +83,8 @@ bvar::Adder<int> & getExecuteErrorMetric(CnchBGThreadType type)
return g_executeImpl_TxnGC_error;
case CnchBGThreadType::Clustering:
return g_executeImpl_Clustering_error;
case CnchBGThreadType::ObjectSchemaAssemble:
return g_executeImpl_ObjectSchemaAssemble_error;
case CnchBGThreadType::MaterializedMySQL:
return g_executeImpl_MaterializedMySQL_error;
default:

View File

@ -1138,6 +1138,7 @@ void registerServerBGThreads(DaemonFactory & factory)
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::Clustering, isCnchMergeTree>>("PART_CLUSTERING");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::Consumer, isCnchKafka>>("CONSUMER");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::DedupWorker, isCnchUniqueTableAndNeedDedup>>("DEDUP_WORKER");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::ObjectSchemaAssemble, isCnchMergeTree>>("OBJECT_SCHEMA_ASSEMBLE");
factory.registerDaemonJobForBGThreadInServer<DaemonJobForCnch<CnchBGThreadType::MaterializedMySQL, isMaterializedMySQL>>("MATERIALIZED_MYSQL");
}

View File

@ -179,6 +179,7 @@ std::unordered_map<CnchBGThreadType, DaemonJobServerBGThreadPtr> createDaemonJob
{ "CONSUMER", 10000},
{ "DEDUP_WORKER", 10000},
{ "PART_CLUSTERING", 10000},
{ "OBJECT_SCHEMA_ASSEMBLE", 10000},
{ "MATERIALIZED_MYSQL", 10000}
};

View File

@ -63,6 +63,11 @@ namespace DB::DaemonManager::BRPCMetrics
bvar::Window<bvar::Adder<int>> g_executeImpl_Clustering_error_minute("DaemonManager_Internal", "executeImpl_Clustering_error", & g_executeImpl_Clustering_error, 60);
bvar::Window<bvar::Adder<int>> g_executeImpl_Clustering_minute("DaemonManager_Internal", "executeImpl_Clustering", & g_executeImpl_Clustering, 60);
bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble_error;
bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble;
bvar::Window<bvar::Adder<int>> g_executeImpl_ObjectSchemaAssemble_error_minute("DaemonManager_Internal", "executeImpl_ObjectSchemaAssemble_error", & g_executeImpl_ObjectSchemaAssemble_error, 60);
bvar::Window<bvar::Adder<int>> g_executeImpl_ObjectSchemaAssemble_minute("DaemonManager_Internal", "executeImpl_ObjectSchemaAssemble", & g_executeImpl_ObjectSchemaAssemble, 60);
bvar::Adder< int > g_executeImpl_MaterializedMySQL_error;
bvar::Adder< int > g_executeImpl_MaterializedMySQL;
bvar::Window<bvar::Adder<int>> g_executeImpl_MaterializedMySQL_error_minute("DaemonManager_Internal", "executeImpl_MaterializedMySQL_error", & g_executeImpl_MaterializedMySQL_error, 60);

View File

@ -37,6 +37,8 @@ namespace DB::DaemonManager::BRPCMetrics
extern bvar::Adder< int > g_executeImpl_TxnGC;
extern bvar::Adder< int > g_executeImpl_Clustering_error;
extern bvar::Adder< int > g_executeImpl_Clustering;
extern bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble_error;
extern bvar::Adder< int > g_executeImpl_ObjectSchemaAssemble;
extern bvar::Adder< int > g_executeImpl_MaterializedMySQL_error;
extern bvar::Adder< int > g_executeImpl_MaterializedMySQL;
}/// end namespace

View File

@ -85,7 +85,7 @@ static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuf
auto full_type = lc_type->getFullLowCardinalityTypePtr();
auto serialization = full_type->getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state);
return ;
@ -95,7 +95,7 @@ static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuf
auto serialization = type.getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state);
}

View File

@ -524,12 +524,13 @@ void RemoteQueryExecutor::sendExternalTables()
{
SelectQueryInfo query_info;
auto metadata_snapshot = cur->getInMemoryMetadataPtr();
auto storage_snapshot = cur->getStorageSnapshot(metadata_snapshot, context);
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
context, QueryProcessingStage::Complete, metadata_snapshot, query_info);
context, QueryProcessingStage::Complete, storage_snapshot, query_info);
Pipe pipe = cur->read(
metadata_snapshot->getColumns().getNamesOfPhysical(),
metadata_snapshot, query_info, context,
storage_snapshot, query_info, context,
read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
if (pipe.empty())

View File

@ -1,3 +1,5 @@
add_subdirectory (Serializations)
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif ()

View File

@ -95,26 +95,26 @@ ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const I
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
}
SerializationPtr DataTypeArray::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0);
}
// SerializationPtr DataTypeArray::getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
// {
// return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0);
// }
SerializationPtr DataTypeArray::getSubcolumnSerializationImpl(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const
{
if (subcolumn_name == "size" + std::to_string(level))
return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false);
// SerializationPtr DataTypeArray::getSubcolumnSerializationImpl(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const
// {
// if (subcolumn_name == "size" + std::to_string(level))
// return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false);
SerializationPtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1);
else
subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
// SerializationPtr subcolumn;
// if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
// subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1);
// else
// subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
return std::make_shared<SerializationArray>(subcolumn);
}
// return std::make_shared<SerializationArray>(subcolumn);
// }
SerializationPtr DataTypeArray::doGetDefaultSerialization() const
{

View File

@ -78,8 +78,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
// SerializationPtr getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
SerializationPtr doGetDefaultSerialization() const override;

View File

@ -27,6 +27,8 @@ public:
bool isCategorial() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool isComparable() const override { return true; }
virtual bool contains(const IDataType & rhs) const = 0;
bool canBeMapKeyType() const override { return true; }
};
@ -78,7 +80,7 @@ public:
/// Example:
/// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK
/// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK
bool contains(const IDataType & rhs) const;
bool contains(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override;

View File

@ -288,6 +288,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeBitMap64(*this);
registerDataTypeSketchBinary(*this);
registerDataTypeDomainBool(*this);
registerDataTypeObject(*this);
}
DataTypeFactory & DataTypeFactory::instance()

View File

@ -113,5 +113,6 @@ void registerDataTypeBitMap64(DataTypeFactory & factory);
void registerDataTypeSet(DataTypeFactory & factory);
void registerDataTypeSketchBinary(DataTypeFactory & factory);
void registerDataTypeDomainBool(DataTypeFactory & factory);
void registerDataTypeObject(DataTypeFactory & factory);
}

View File

@ -32,6 +32,27 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
DataTypeMap::DataTypeMap(const DataTypePtr & nested_)
: nested(nested_)
{
const auto * type_array = typeid_cast<const DataTypeArray *>(nested.get());
if (!type_array)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_array->getNestedType().get());
if (!type_tuple)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
if (type_tuple->getElements().size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
key_type = type_tuple->getElement(0);
value_type = type_tuple->getElement(1);
checkKeyType();
}
DataTypeMap::DataTypeMap(const DataTypes & elems_)
{
@ -93,11 +114,11 @@ ColumnPtr DataTypeMap::getSubcolumn(const String & subcolumn_name, const IColumn
return nested->getSubcolumn(subcolumn_name, extractNestedColumn(column));
}
SerializationPtr DataTypeMap::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
}
// SerializationPtr DataTypeMap::getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
// {
// return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
// }
MutableColumnPtr DataTypeMap::createColumn() const
{

View File

@ -31,6 +31,7 @@ private:
public:
static constexpr bool is_parametric = true;
explicit DataTypeMap(const DataTypePtr & nested_);
DataTypeMap(const DataTypes & elems);
DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_);
@ -42,8 +43,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
// SerializationPtr getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
MutableColumnPtr createColumn() const override;
@ -73,7 +74,6 @@ public:
private:
void checkKeyType() const;
bool isMapKVStore() const { return flags & TYPE_MAP_KV_STORE_FLAG;}
bool isMapByteStore() const { return flags & TYPE_MAP_BYTE_STORE_FLAG; }
};

View File

@ -81,14 +81,14 @@ ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IC
return nested_data_type->getSubcolumn(subcolumn_name, column_nullable.getNestedColumn());
}
SerializationPtr DataTypeNullable::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
if (subcolumn_name == "null")
return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt8()), subcolumn_name, false);
// SerializationPtr DataTypeNullable::getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
// {
// if (subcolumn_name == "null")
// return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt8()), subcolumn_name, false);
return nested_data_type->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
}
// return nested_data_type->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
// }
SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
{

View File

@ -68,8 +68,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
// SerializationPtr getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
const DataTypePtr & getNestedType() const { return nested_data_type; }

View File

@ -0,0 +1,82 @@
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationObject.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <IO/Operators.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNEXPECTED_AST_STRUCTURE;
}
DataTypeObject::DataTypeObject(const String & schema_format_, bool is_nullable_)
: schema_format(Poco::toLower(schema_format_))
, is_nullable(is_nullable_)
{
}
bool DataTypeObject::equals(const IDataType & rhs) const
{
if (const auto * object = typeid_cast<const DataTypeObject *>(&rhs))
return schema_format == object->schema_format && is_nullable == object->is_nullable;
return false;
}
SerializationPtr DataTypeObject::doGetDefaultSerialization() const
{
return getObjectSerialization(schema_format);
}
String DataTypeObject::doGetName() const
{
WriteBufferFromOwnString out;
if (is_nullable)
out << "Object(Nullable(" << quote << schema_format << "))";
else
out << "Object(" << quote << schema_format << ")";
return out.str();
}
static DataTypePtr create(const ASTPtr & arguments)
{
if (!arguments || arguments->children.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Object data type family must have one argument - name of schema format");
ASTPtr schema_argument = arguments->children[0];
bool is_nullable = false;
if (const auto * func = schema_argument->as<ASTFunction>())
{
if (func->name != "Nullable" || func->arguments->children.size() != 1)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
"Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", func->name);
schema_argument = func->arguments->children[0];
is_nullable = true;
}
const auto * literal = schema_argument->as<ASTLiteral>();
if (!literal || literal->value.getType() != Field::Types::String)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
"Object data type family must have a const string as its schema name parameter");
return std::make_shared<DataTypeObject>(literal->value.get<const String &>(), is_nullable);
}
void registerDataTypeObject(DataTypeFactory & factory)
{
factory.registerDataType("Object", create);
factory.registerSimpleDataType("JSON",
[] { return std::make_shared<DataTypeObject>("JSON", false); },
DataTypeFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <Core/Field.h>
#include <Columns/ColumnObject.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
class DataTypeObject : public IDataType
{
private:
String schema_format;
bool is_nullable;
public:
DataTypeObject(const String & schema_format_, bool is_nullable_);
const char * getFamilyName() const override { return "Object"; }
String doGetName() const override;
TypeIndex getTypeId() const override { return TypeIndex::Object; }
MutableColumnPtr createColumn() const override { return ColumnObject::create(is_nullable); }
Field getDefault() const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDefault() is not implemented for data type {}", getName());
}
bool haveSubtypes() const override { return false; }
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return true; }
bool hasDynamicSubcolumns() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override;
bool hasNullableSubcolumns() const { return is_nullable; }
const String & getSchemaFormat() const { return schema_format; }
};
}

View File

@ -291,25 +291,25 @@ ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColu
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
}
SerializationPtr DataTypeTuple::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
auto on_success = [&](size_t pos)
{
return std::make_shared<SerializationTupleElement>(base_serialization_getter(*elems[pos]), names[pos]);
};
// SerializationPtr DataTypeTuple::getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
// {
// auto on_success = [&](size_t pos)
// {
// return std::make_shared<SerializationTupleElement>(base_serialization_getter(*elems[pos]), names[pos]);
// };
auto on_continue = [&](size_t pos, const String & next_subcolumn)
{
auto next_serialization = elems[pos]->getSubcolumnSerialization(next_subcolumn, base_serialization_getter);
return std::make_shared<SerializationTupleElement>(next_serialization, names[pos]);
};
// auto on_continue = [&](size_t pos, const String & next_subcolumn)
// {
// auto next_serialization = elems[pos]->getSubcolumnSerialization(next_subcolumn, base_serialization_getter);
// return std::make_shared<SerializationTupleElement>(next_serialization, names[pos]);
// };
if (auto serialization = getSubcolumnEntity(subcolumn_name, on_success, on_continue))
return serialization;
// if (auto serialization = getSubcolumnEntity(subcolumn_name, on_success, on_continue))
// return serialization;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
}
// throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
// }
SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
@ -320,7 +320,7 @@ SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
{
String elem_name = use_explicit_names ? names[i] : toString(i + 1);
auto serialization = elems[i]->getDefaultSerialization();
serializations[i] = std::make_shared<SerializationTupleElement>(serialization, elem_name);
serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
}
return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names);
@ -335,7 +335,7 @@ SerializationPtr DataTypeTuple::getSerialization(const String & column_name, con
String elem_name = use_explicit_names ? names[i] : toString(i + 1);
auto subcolumn_name = Nested::concatenateName(column_name, elem_name);
auto serializaion = elems[i]->getSerialization(subcolumn_name, callback);
serializations[i] = std::make_shared<SerializationTupleElement>(serializaion, elem_name);
serializations[i] = std::make_shared<SerializationNamed>(serializaion, elem_name);
}
return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names);

View File

@ -57,8 +57,8 @@ public:
SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
// SerializationPtr getSubcolumnSerialization(
// const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
SerializationPtr doGetDefaultSerialization() const override;

View File

@ -31,6 +31,7 @@
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeIPv4andIPv6.h>
#include <DataTypes/DataTypeBitMap64.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/Exception.h>
@ -152,9 +153,9 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const
element_types.reserve(x.size());
for (const Field & elem : x)
element_types.emplace_back(applyVisitor(FieldToDataType(), elem));
element_types.emplace_back(applyVisitor(FieldToDataType(allow_convertion_to_string), elem));
return std::make_shared<DataTypeArray>(getLeastSupertype(element_types));
return std::make_shared<DataTypeArray>(getLeastSupertype(element_types, allow_convertion_to_string));
}
@ -167,7 +168,7 @@ DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const
element_types.reserve(tuple.size());
for (const auto & element : tuple)
element_types.push_back(applyVisitor(FieldToDataType(), element));
element_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), element));
return std::make_shared<DataTypeTuple>(element_types);
}
@ -185,7 +186,9 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const
value_types.push_back(applyVisitor(FieldToDataType(), elem.second));
}
return std::make_shared<DataTypeMap>(getLeastSupertype(key_types), getLeastSupertype(value_types));
return std::make_shared<DataTypeMap>(
getLeastSupertype(key_types, allow_convertion_to_string),
getLeastSupertype(value_types, allow_convertion_to_string));
}
DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const
@ -199,4 +202,10 @@ DataTypePtr FieldToDataType::operator() (const BitMap64 &) const
return std::make_shared<DataTypeBitMap64>();
}
DataTypePtr FieldToDataType::operator() (const Object &) const
{
/// TODO: Do we need different parameters for type Object?
return std::make_shared<DataTypeObject>("json", false);
}
}

View File

@ -41,6 +41,11 @@ using DataTypePtr = std::shared_ptr<const IDataType>;
class FieldToDataType : public StaticVisitor<DataTypePtr>
{
public:
FieldToDataType(bool allow_convertion_to_string_ = false)
: allow_convertion_to_string(allow_convertion_to_string_)
{
}
DataTypePtr operator() (const Null & x) const;
DataTypePtr operator() (const NegativeInfinity & x) const;
DataTypePtr operator() (const PositiveInfinity & x) const;
@ -64,6 +69,10 @@ public:
DataTypePtr operator() (const DecimalField<Decimal256> & x) const;
DataTypePtr operator() (const AggregateFunctionStateData & x) const;
DataTypePtr operator() (const BitMap64 & x) const;
DataTypePtr operator() (const Object & map) const;
private:
bool allow_convertion_to_string;
};
}

View File

@ -33,6 +33,7 @@
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/Serializations/SerializationTupleElement.h>
#include <DataTypes/Serializations/SerializationInfo.h>
#include <Storages/MergeTree/MergeTreeSuffix.h>
@ -117,6 +118,50 @@ size_t IDataType::getSizeOfValueInMemory() const
throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR);
}
void IDataType::forEachSubcolumn(
const SubcolumnCallback & callback,
const SubstreamData & data)
{
ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
{
for (size_t i = 0; i < subpath.size(); ++i)
{
size_t prefix_len = i + 1;
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
{
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
auto subdata = ISerialization::createFromPath(subpath, prefix_len);
callback(subpath, name, subdata);
}
subpath[i].visited = true;
}
};
ISerialization::EnumerateStreamsSettings settings;
settings.position_independent_encoding = false;
data.serialization->enumerateStreams(settings, callback_with_data, data);
}
template <typename Ptr>
Ptr IDataType::getForSubcolumn(
const String & subcolumn_name,
const SubstreamData & data,
Ptr SubstreamData::*member,
bool throw_if_null) const
{
Ptr res;
forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata)
{
if (name == subcolumn_name)
res = subdata.*member;
}, data);
if (!res && throw_if_null)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
return res;
}
DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
{
if (subcolumn_name == MAIN_SUBCOLUMN_NAME)
@ -127,11 +172,17 @@ DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
{
auto subcolumn_type = tryGetSubcolumnType(subcolumn_name);
if (subcolumn_type)
return subcolumn_type;
if (subcolumn_name == MAIN_SUBCOLUMN_NAME)
return shared_from_this();
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
}
ColumnPtr IDataType::tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
{
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
}
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const
@ -141,21 +192,12 @@ ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &
Names IDataType::getSubcolumnNames() const
{
NameSet res;
getDefaultSerialization()->enumerateStreams([&res, this](const ISerialization::SubstreamPath & substream_path)
Names res;
forEachSubcolumn([&](const auto &, const auto & name, const auto &)
{
ISerialization::SubstreamPath new_path;
/// Iterate over path to try to get intermediate subcolumns for complex nested types.
for (const auto & elem : substream_path)
{
new_path.push_back(elem);
auto subcolumn_name = ISerialization::getSubcolumnNameForStream(new_path);
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
res.insert(subcolumn_name);
}
});
return Names(std::make_move_iterator(res.begin()), std::make_move_iterator(res.end()));
res.push_back(name);
}, SubstreamData(getDefaultSerialization()));
return res;
}
void IDataType::insertDefaultInto(IColumn & column) const
@ -173,6 +215,14 @@ void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
custom_serialization = std::move(custom_desc_->serialization);
}
SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
{
if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
return getSerializationInfo(column_const->getDataColumn());
return std::make_shared<SerializationInfo>(ISerialization::getKind(column), SerializationInfo::Settings{});
}
SerializationPtr IDataType::getDefaultSerialization() const
{
if (custom_serialization)
@ -181,9 +231,23 @@ SerializationPtr IDataType::getDefaultSerialization() const
return doGetDefaultSerialization();
}
SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const BaseSerializationGetter &) const
SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
auto data = SubstreamData(serialization);
return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
}
SerializationPtr IDataType::getSerialization(ISerialization::Kind /*kind*/) const
{
// if (supportsSparseSerialization() && kind == ISerialization::Kind::SPARSE)
// return getSparseSerialization();
return getDefaultSerialization();
}
SerializationPtr IDataType::getSerialization(const SerializationInfo & info) const
{
return getSerialization(info.getKind());
}
// static
@ -191,14 +255,9 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, con
{
if (column.isSubcolumn())
{
/// Wrap to custom serialization deepest subcolumn, which is represented in non-complex type.
auto base_serialization_getter = [&](const IDataType & subcolumn_type)
{
return subcolumn_type.getSerialization(column.name, callback);
};
auto type_in_storage = column.getTypeInStorage();
return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), base_serialization_getter);
const auto & type_in_storage = column.getTypeInStorage();
auto default_serialization = type_in_storage->getDefaultSerialization();
return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), default_serialization);
}
return column.type->getSerialization(column.name, callback);

View File

@ -110,12 +110,34 @@ public:
static constexpr auto MAIN_SUBCOLUMN_NAME = "__main";
virtual DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const;
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
ColumnPtr tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const;
virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const;
Names getSubcolumnNames() const;
using SubstreamData = ISerialization::SubstreamData;
using SubstreamPath = ISerialization::SubstreamPath;
using SubcolumnCallback = std::function<void(
const SubstreamPath &,
const String &,
const SubstreamData &)>;
static void forEachSubcolumn(
const SubcolumnCallback & callback,
const SubstreamData & data);
virtual SerializationInfoPtr getSerializationInfo(const IColumn & column) const;
/// Returns default serialization of data type.
SerializationPtr getDefaultSerialization() const;
/// Chooses serialization according to serialization kind.
SerializationPtr getSerialization(ISerialization::Kind kind) const;
/// Chooses serialization according to collected information about content of column.
virtual SerializationPtr getSerialization(const SerializationInfo & info) const;
/// Asks whether the stream with given name exists in table.
/// If callback returned true for all streams, which are required for
/// one of serialization types, that serialization will be chosen for reading.
@ -133,7 +155,7 @@ public:
/// Returns serialization wrapper for reading one particular subcolumn of data type.
virtual SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const;
const String & subcolumn_name, const SerializationPtr & serialization) const;
using StreamCallbackWithType = std::function<void(const ISerialization::SubstreamPath &, const IDataType &)>;
@ -371,6 +393,14 @@ protected:
public:
const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
const ISerialization * getCustomSerialization() const { return custom_serialization.get(); }
private:
template <typename Ptr>
Ptr getForSubcolumn(
const String & subcolumn_name,
const SubstreamData & data,
Ptr SubstreamData::*member,
bool throw_if_null) const;
};
void setDefaultUseMapType(bool default_use_kv_map_type);
@ -443,6 +473,7 @@ struct WhichDataType
constexpr bool isMap() const {return idx == TypeIndex::Map; }
constexpr bool isSet() const { return idx == TypeIndex::Set; }
constexpr bool isInterval() const { return idx == TypeIndex::Interval; }
constexpr bool isObject() const { return idx == TypeIndex::Object; }
constexpr bool isNothing() const { return idx == TypeIndex::Nothing; }
constexpr bool isNullable() const { return idx == TypeIndex::Nullable; }
@ -450,6 +481,7 @@ struct WhichDataType
constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
constexpr bool isSimple() const { return isInt() || isUInt() || isFloat() || isString(); }
constexpr bool isBitmap64() const { return idx == TypeIndex::BitMap64; }
};
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
@ -481,6 +513,12 @@ inline bool isIPv4(const DataTypePtr & data_type) { return WhichDataType(data_ty
inline bool isIPv6(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv6(); }
inline bool isBitmap64(const DataTypePtr & data_type) { return WhichDataType(data_type).isBitmap64(); }
template <typename T>
inline bool isObject(const T & data_type)
{
return WhichDataType(data_type).isObject();
}
template <typename T>
inline bool isUInt8(const T & data_type)
{
@ -642,4 +680,19 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime> = t
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime64> = true;
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeTime> = true;
#define FOR_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(UInt128) \
M(UInt256) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256) \
M(Float32) \
M(Float64)
}

Some files were not shown because too many files have changed in this diff Show More