Merge branch 'master' into CLICKHOUSE-5

This commit is contained in:
alexey-milovidov 2017-06-13 07:30:36 +03:00 committed by GitHub
commit a669136ac2
531 changed files with 9125 additions and 6355 deletions

View File

@ -0,0 +1,150 @@
// (C) Copyright John Maddock 2000.
// Use, modification and distribution are subject to the Boost Software License,
// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt).
//
// See http://www.boost.org/libs/type_traits for most recent version including documentation.
// See boost/type_traits/*.hpp for full copyright notices.
#ifndef BOOST_TYPE_TRAITS_HPP
#define BOOST_TYPE_TRAITS_HPP
#include <boost/type_traits/add_const.hpp>
#include <boost/type_traits/add_cv.hpp>
#include <boost/type_traits/add_lvalue_reference.hpp>
#include <boost/type_traits/add_pointer.hpp>
#include <boost/type_traits/add_reference.hpp>
#include <boost/type_traits/add_rvalue_reference.hpp>
#include <boost/type_traits/add_volatile.hpp>
#include <boost/type_traits/aligned_storage.hpp>
#include <boost/type_traits/alignment_of.hpp>
#include <boost/type_traits/common_type.hpp>
#include <boost/type_traits/conditional.hpp>
#include <boost/type_traits/copy_cv.hpp>
#include <boost/type_traits/decay.hpp>
#include <boost/type_traits/declval.hpp>
#include <boost/type_traits/extent.hpp>
#include <boost/type_traits/floating_point_promotion.hpp>
#include <boost/type_traits/function_traits.hpp>
#include <boost/type_traits/has_bit_and.hpp>
#include <boost/type_traits/has_bit_and_assign.hpp>
#include <boost/type_traits/has_bit_or.hpp>
#include <boost/type_traits/has_bit_or_assign.hpp>
#include <boost/type_traits/has_bit_xor.hpp>
#include <boost/type_traits/has_bit_xor_assign.hpp>
#include <boost/type_traits/has_complement.hpp>
#include <boost/type_traits/has_dereference.hpp>
#include <boost/type_traits/has_divides.hpp>
#include <boost/type_traits/has_divides_assign.hpp>
#include <boost/type_traits/has_equal_to.hpp>
#include <boost/type_traits/has_greater.hpp>
#include <boost/type_traits/has_greater_equal.hpp>
#include <boost/type_traits/has_left_shift.hpp>
#include <boost/type_traits/has_left_shift_assign.hpp>
#include <boost/type_traits/has_less.hpp>
#include <boost/type_traits/has_less_equal.hpp>
#include <boost/type_traits/has_logical_and.hpp>
#include <boost/type_traits/has_logical_not.hpp>
#include <boost/type_traits/has_logical_or.hpp>
#include <boost/type_traits/has_minus.hpp>
#include <boost/type_traits/has_minus_assign.hpp>
#include <boost/type_traits/has_modulus.hpp>
#include <boost/type_traits/has_modulus_assign.hpp>
#include <boost/type_traits/has_multiplies.hpp>
#include <boost/type_traits/has_multiplies_assign.hpp>
#include <boost/type_traits/has_negate.hpp>
#if !defined(__BORLANDC__) && !defined(__CUDACC__)
#include <boost/type_traits/has_new_operator.hpp>
#endif
#include <boost/type_traits/has_not_equal_to.hpp>
#include <boost/type_traits/has_nothrow_assign.hpp>
#include <boost/type_traits/has_nothrow_constructor.hpp>
#include <boost/type_traits/has_nothrow_copy.hpp>
#include <boost/type_traits/has_nothrow_destructor.hpp>
#include <boost/type_traits/has_plus.hpp>
#include <boost/type_traits/has_plus_assign.hpp>
#include <boost/type_traits/has_post_decrement.hpp>
#include <boost/type_traits/has_post_increment.hpp>
#include <boost/type_traits/has_pre_decrement.hpp>
#include <boost/type_traits/has_pre_increment.hpp>
#include <boost/type_traits/has_right_shift.hpp>
#include <boost/type_traits/has_right_shift_assign.hpp>
#include <boost/type_traits/has_trivial_assign.hpp>
#include <boost/type_traits/has_trivial_constructor.hpp>
#include <boost/type_traits/has_trivial_copy.hpp>
#include <boost/type_traits/has_trivial_destructor.hpp>
#include <boost/type_traits/has_trivial_move_assign.hpp>
#include <boost/type_traits/has_trivial_move_constructor.hpp>
#include <boost/type_traits/has_unary_minus.hpp>
#include <boost/type_traits/has_unary_plus.hpp>
#include <boost/type_traits/has_virtual_destructor.hpp>
#include <boost/type_traits/integral_constant.hpp>
#include <boost/type_traits/is_abstract.hpp>
#include <boost/type_traits/is_arithmetic.hpp>
#include <boost/type_traits/is_array.hpp>
#include <boost/type_traits/is_assignable.hpp>
#include <boost/type_traits/is_base_and_derived.hpp>
#include <boost/type_traits/is_base_of.hpp>
#include <boost/type_traits/is_class.hpp>
#include <boost/type_traits/is_complex.hpp>
#include <boost/type_traits/is_compound.hpp>
#include <boost/type_traits/is_const.hpp>
#include <boost/type_traits/is_constructible.hpp>
#include <boost/type_traits/is_convertible.hpp>
#include <boost/type_traits/is_copy_assignable.hpp>
#include <boost/type_traits/is_copy_constructible.hpp>
#include <boost/type_traits/is_default_constructible.hpp>
#include <boost/type_traits/is_destructible.hpp>
#include <boost/type_traits/is_empty.hpp>
#include <boost/type_traits/is_enum.hpp>
#include <boost/type_traits/is_final.hpp>
#include <boost/type_traits/is_float.hpp>
#include <boost/type_traits/is_floating_point.hpp>
#include <boost/type_traits/is_function.hpp>
#include <boost/type_traits/is_fundamental.hpp>
#include <boost/type_traits/is_integral.hpp>
#include <boost/type_traits/is_lvalue_reference.hpp>
#include <boost/type_traits/is_member_function_pointer.hpp>
#include <boost/type_traits/is_member_object_pointer.hpp>
#include <boost/type_traits/is_member_pointer.hpp>
#include <boost/type_traits/is_nothrow_move_assignable.hpp>
#include <boost/type_traits/is_nothrow_move_constructible.hpp>
#include <boost/type_traits/is_object.hpp>
#include <boost/type_traits/is_pod.hpp>
#include <boost/type_traits/is_pointer.hpp>
#include <boost/type_traits/is_polymorphic.hpp>
#include <boost/type_traits/is_reference.hpp>
#include <boost/type_traits/is_rvalue_reference.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/type_traits/is_scalar.hpp>
#include <boost/type_traits/is_signed.hpp>
#include <boost/type_traits/is_stateless.hpp>
#include <boost/type_traits/is_union.hpp>
#include <boost/type_traits/is_unsigned.hpp>
#include <boost/type_traits/is_virtual_base_of.hpp>
#include <boost/type_traits/is_void.hpp>
#include <boost/type_traits/is_volatile.hpp>
#include <boost/type_traits/make_signed.hpp>
#include <boost/type_traits/make_unsigned.hpp>
#include <boost/type_traits/rank.hpp>
#include <boost/type_traits/remove_all_extents.hpp>
#include <boost/type_traits/remove_bounds.hpp>
#include <boost/type_traits/remove_const.hpp>
#include <boost/type_traits/remove_cv.hpp>
#include <boost/type_traits/remove_extent.hpp>
#include <boost/type_traits/remove_pointer.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <boost/type_traits/remove_volatile.hpp>
#include <boost/type_traits/type_identity.hpp>
#include <boost/type_traits/type_with_alignment.hpp>
#if !(defined(__sgi) && defined(__EDG_VERSION__) && (__EDG_VERSION__ == 238))
#include <boost/type_traits/integral_promotion.hpp>
#include <boost/type_traits/promote.hpp>
#endif
#endif // BOOST_TYPE_TRAITS_HPP

View File

@ -42,7 +42,6 @@ set(dbms_headers)
set(dbms_sources)
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
add_headers_and_sources(dbms src/TableFunctions)
add_headers_and_sources(dbms src/Parsers)
add_headers_and_sources(dbms src/Analyzers)
add_headers_and_sources(dbms src/Core)
@ -70,9 +69,8 @@ list (APPEND dbms_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/DataTypeTraits.cpp)
list (APPEND dbms_headers src/Functions/IFunction.h src/Functions/FunctionFactory.h src/Functions/DataTypeTraits.h)
list (APPEND dbms_sources
src/AggregateFunctions/AggregateFunctionFactory.cpp
src/AggregateFunctions/AggregateFunctionState.cpp
src/AggregateFunctions/AggregateFunctionFactory.cpp
src/AggregateFunctions/AggregateFunctionState.cpp
src/AggregateFunctions/AggregateFunctionArray.cpp
@ -80,22 +78,21 @@ list (APPEND dbms_sources
src/AggregateFunctions/AggregateFunctionForEach.cpp
src/AggregateFunctions/AggregateFunctionIf.cpp
src/AggregateFunctions/AggregateFunctionMerge.cpp
src/AggregateFunctions/AggregateFunctionCount.cpp
)
src/AggregateFunctions/AggregateFunctionCount.cpp)
list (APPEND dbms_headers
src/AggregateFunctions/IAggregateFunction.h
src/AggregateFunctions/AggregateFunctionFactory.h
src/AggregateFunctions/AggregateFunctionState.h
src/AggregateFunctions/AggregateFunctionFactory.h
src/AggregateFunctions/AggregateFunctionState.h
src/AggregateFunctions/AggregateFunctionArray.h
src/AggregateFunctions/AggregateFunctionNull.h
src/AggregateFunctions/AggregateFunctionForEach.h
src/AggregateFunctions/AggregateFunctionIf.h
src/AggregateFunctions/AggregateFunctionMerge.h
src/AggregateFunctions/AggregateFunctionCount.h
)
src/AggregateFunctions/AggregateFunctionCount.h)
list (APPEND dbms_sources src/TableFunctions/TableFunctionFactory.cpp)
list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h)
list(REMOVE_ITEM dbms_sources

View File

@ -52,12 +52,12 @@ void AggregateFunctionFactory::registerFunction(const String & name, Creator cre
" a null constructor", ErrorCodes::LOGICAL_ERROR);
if (!aggregate_functions.emplace(name, creator).second)
throw Exception("AggregateFunctionFactory: the aggregate function name " + name + " is not unique",
throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
if (case_sensitiveness == CaseInsensitive
&& !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator).second)
throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name " + name + " is not unique",
throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
}

View File

@ -18,7 +18,7 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <ext/range.hpp>
#include <ext/range.h>
namespace DB

View File

@ -5,7 +5,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <Parsers/CommonParsers.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <boost/range/iterator_range_core.hpp>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ASTLiteral.h>

View File

@ -23,6 +23,8 @@ struct CollectTables;
* SELECT array FROM t ARRAY JOIN array array -> array
* SELECT nested.elem FROM t ARRAY JOIN nested nested -> nested
* SELECT elem FROM t ARRAY JOIN [1, 2, 3] AS elem elem -> [1, 2, 3]
*
* Does not analyze arrayJoin functions.
*/
struct AnalyzeArrayJoins
{

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
}
void AnalyzeResultOfQuery::process(ASTPtr & ast, const Context & context)
void AnalyzeResultOfQuery::process(ASTPtr & ast, const Context & context, ExecuteTableFunctions & table_functions)
{
const ASTSelectQuery * select = typeid_cast<const ASTSelectQuery *>(ast.get());
if (!select)
@ -35,13 +35,13 @@ void AnalyzeResultOfQuery::process(ASTPtr & ast, const Context & context)
collect_aliases.process(ast);
CollectTables collect_tables;
collect_tables.process(ast, context, collect_aliases);
collect_tables.process(ast, context, collect_aliases, table_functions);
AnalyzeColumns analyze_columns;
analyze_columns.process(ast, collect_aliases, collect_tables);
TypeAndConstantInference inference;
inference.process(ast, context, collect_aliases, analyze_columns, analyze_lambdas);
inference.process(ast, context, collect_aliases, analyze_columns, analyze_lambdas, table_functions);
for (const ASTPtr & child : select->select_expression_list->children)
{

View File

@ -9,6 +9,7 @@ namespace DB
class WriteBuffer;
class Context;
struct ExecuteTableFunctions;
/** For SELECT query, determine names and types of columns of result,
@ -19,7 +20,7 @@ class Context;
*/
struct AnalyzeResultOfQuery
{
void process(ASTPtr & ast, const Context & context);
void process(ASTPtr & ast, const Context & context, ExecuteTableFunctions & table_functions);
/// Block will have non-nullptr columns for constant expressions.
Block result;

View File

@ -1,8 +1,8 @@
#include <Analyzers/CollectTables.h>
#include <Analyzers/CollectAliases.h>
#include <Analyzers/ExecuteTableFunctions.h>
#include <Analyzers/AnalyzeResultOfQuery.h>
#include <Interpreters/Context.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
@ -49,20 +49,20 @@ static CollectTables::TableInfo processOrdinaryTable(const ASTPtr & ast_database
}
static CollectTables::TableInfo processTableFunction(const ASTPtr & ast_table_function, const Context & context)
static CollectTables::TableInfo processTableFunction(const ASTPtr & ast_table_function, const ExecuteTableFunctions & table_functions)
{
const ASTFunction & function = typeid_cast<const ASTFunction &>(*ast_table_function);
IAST::Hash ast_hash = ast_table_function->getTreeHash();
auto it = table_functions.tables.find(ast_hash);
if (table_functions.tables.end() == it)
throw Exception("Table function " + function.name + " was not executed in advance.", ErrorCodes::LOGICAL_ERROR);
CollectTables::TableInfo res;
res.node = ast_table_function;
res.alias = function.tryGetAlias();
/// Obtain table function
TableFunctionPtr table_function_ptr = context.getTableFunctionFactory().get(function.name, context);
/// Execute it and store result
/// TODO Avoid double execution of table functions during type inference in subqueries.
/// TODO Avoid double execution of same table functions.
res.storage = table_function_ptr->execute(ast_table_function, context);
res.storage = it->second;
return res;
}
@ -78,10 +78,10 @@ static CollectTables::TableInfo processNoTables(const Context & context)
}
static CollectTables::TableInfo processSubquery(ASTPtr & ast_subquery, const Context & context)
static CollectTables::TableInfo processSubquery(ASTPtr & ast_subquery, const Context & context, ExecuteTableFunctions & table_functions)
{
AnalyzeResultOfQuery analyzer;
analyzer.process(typeid_cast<ASTSubquery &>(*ast_subquery).children.at(0), context);
analyzer.process(typeid_cast<ASTSubquery &>(*ast_subquery).children.at(0), context, table_functions);
CollectTables::TableInfo res;
res.node = ast_subquery;
@ -91,7 +91,7 @@ static CollectTables::TableInfo processSubquery(ASTPtr & ast_subquery, const Con
}
void CollectTables::process(ASTPtr & ast, const Context & context, const CollectAliases & aliases)
void CollectTables::process(ASTPtr & ast, const Context & context, const CollectAliases & aliases, ExecuteTableFunctions & table_functions)
{
const ASTSelectQuery * select = typeid_cast<const ASTSelectQuery *>(ast.get());
if (!select)
@ -120,11 +120,11 @@ void CollectTables::process(ASTPtr & ast, const Context & context, const Collect
}
else if (table_expression.table_function)
{
tables.emplace_back(processTableFunction(table_expression.table_function, context));
tables.emplace_back(processTableFunction(table_expression.table_function, table_functions));
}
else if (table_expression.subquery)
{
tables.emplace_back(processSubquery(table_expression.subquery, context));
tables.emplace_back(processSubquery(table_expression.subquery, context, table_functions));
}
else
throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR);

View File

@ -10,6 +10,7 @@ namespace DB
class Context;
struct CollectAliases;
struct ExecuteTableFunctions;
class WriteBuffer;
@ -18,13 +19,11 @@ class WriteBuffer;
*
* For ordinary tables, determine database and table name, obtain and keep StoragePtr.
* For subqueries, determine result structure. This requires analysis of subquery, such as type inference.
* For table functions, execute them to obtain resulting StoragePtr.
*
* NOTE: We assume, that execution of table functions is cheap, as we do it during analysis.
* For table functions, grab them from prepared ExecuteTableFunctions object.
*/
struct CollectTables
{
void process(ASTPtr & ast, const Context & context, const CollectAliases & aliases);
void process(ASTPtr & ast, const Context & context, const CollectAliases & aliases, ExecuteTableFunctions & table_functions);
enum class Kind
{

View File

@ -0,0 +1,79 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzers/ExecuteTableFunctions.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int LOGICAL_ERROR;
}
/// Allows to execute exactly same table functions only once.
using ASTTreeToTable = std::map<IAST::Hash, StoragePtr>;
static void processTableFunction(const ASTPtr & ast_table_function, const Context & context, ExecuteTableFunctions::Tables & result_map)
{
const ASTFunction & function = typeid_cast<const ASTFunction &>(*ast_table_function);
/// If already executed.
IAST::Hash ast_hash = ast_table_function->getTreeHash();
if (result_map.count(ast_hash))
return;
/// Obtain table function
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(function.name, context);
/// Execute it and store result
StoragePtr table = table_function_ptr->execute(ast_table_function, context);
result_map[ast_hash] = table;
}
void ExecuteTableFunctions::process(ASTPtr & ast, const Context & context)
{
const ASTSelectQuery * select = typeid_cast<const ASTSelectQuery *>(ast.get());
if (!select)
throw Exception("ExecuteTableFunctions::process was called for not a SELECT query", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
if (!select->tables)
return;
for (auto & child : select->tables->children)
{
ASTTablesInSelectQueryElement & element = static_cast<ASTTablesInSelectQueryElement &>(*child);
if (!element.table_expression) /// This is ARRAY JOIN
continue;
ASTTableExpression & table_expression = static_cast<ASTTableExpression &>(*element.table_expression);
if (!table_expression.table_function)
continue;
processTableFunction(table_expression.table_function, context, tables);
}
}
void ExecuteTableFunctions::dump(WriteBuffer & out) const
{
for (const auto & table : tables)
{
writeString(table.second->getName(), out);
writeCString("\n\n", out);
writeString(table.second->getColumnsList().toString(), out);
writeCString("\n", out);
}
}
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Parsers/IAST.h>
#include <Storages/IStorage.h>
#include <map>
namespace DB
{
class Context;
class WriteBuffer;
/** For every table function, found in first level of the query
* (don't go into subqueries)
* execute it and save corresponding StoragePtr.
*
* Execution of table functions must be done in a stage of query analysis,
* because otherwise we don't know table structure. So, it is assumed as cheap operation.
*
* Multiple occurences of table functions with same arguments will be executed only once.
*/
struct ExecuteTableFunctions
{
void process(ASTPtr & ast, const Context & context);
using Tables = std::map<IAST::Hash, StoragePtr>;
Tables tables;
/// Debug output
void dump(WriteBuffer & out) const;
};
}

View File

@ -59,7 +59,8 @@ void processImpl(
ASTPtr & ast, const Context & context,
CollectAliases & aliases, const AnalyzeColumns & columns,
TypeAndConstantInference::Info & info,
const AnalyzeLambdas & lambdas);
const AnalyzeLambdas & lambdas,
ExecuteTableFunctions & table_functions);
void processLiteral(const String & column_name, const ASTPtr & ast, TypeAndConstantInference::Info & info)
@ -77,7 +78,7 @@ void processLiteral(const String & column_name, const ASTPtr & ast, TypeAndConst
void processIdentifier(const String & column_name, const ASTPtr & ast, TypeAndConstantInference::Info & info,
const Context & context, CollectAliases & aliases, const AnalyzeColumns & columns,
const AnalyzeLambdas & lambdas)
const AnalyzeLambdas & lambdas, ExecuteTableFunctions & table_functions)
{
/// Column from table
auto it = columns.columns.find(column_name);
@ -112,7 +113,7 @@ void processIdentifier(const String & column_name, const ASTPtr & ast, TypeAndCo
if (it->second.kind != CollectAliases::Kind::Expression)
throw Exception("Logical error: unexpected kind of alias", ErrorCodes::LOGICAL_ERROR);
processImpl(it->second.node, context, aliases, columns, info, lambdas);
processImpl(it->second.node, context, aliases, columns, info, lambdas, table_functions);
info[column_name] = info[it->second.node->getColumnName()];
}
}
@ -258,12 +259,12 @@ void processFunction(const String & column_name, ASTPtr & ast, TypeAndConstantIn
void processScalarSubquery(const String & column_name, ASTPtr & ast, TypeAndConstantInference::Info & info,
const Context & context)
const Context & context, ExecuteTableFunctions & table_functions)
{
ASTSubquery * subquery = static_cast<ASTSubquery *>(ast.get());
AnalyzeResultOfQuery analyzer;
analyzer.process(subquery->children.at(0), context);
analyzer.process(subquery->children.at(0), context, table_functions);
if (!analyzer.result)
throw Exception("Logical error: no columns returned from scalar subquery", ErrorCodes::LOGICAL_ERROR);
@ -319,7 +320,8 @@ void processHigherOrderFunction(const String & column_name,
ASTPtr & ast, const Context & context,
CollectAliases & aliases, const AnalyzeColumns & columns,
TypeAndConstantInference::Info & info,
const AnalyzeLambdas & lambdas)
const AnalyzeLambdas & lambdas,
ExecuteTableFunctions & table_functions)
{
ASTFunction * function = static_cast<ASTFunction *>(ast.get());
@ -383,7 +385,7 @@ void processHigherOrderFunction(const String & column_name,
/// Now dive into.
processImpl(lambda->arguments->children[1], context, aliases, columns, info, lambdas);
processImpl(lambda->arguments->children[1], context, aliases, columns, info, lambdas, table_functions);
/// Update Expression type (expression signature).
@ -398,7 +400,8 @@ void processImpl(
ASTPtr & ast, const Context & context,
CollectAliases & aliases, const AnalyzeColumns & columns,
TypeAndConstantInference::Info & info,
const AnalyzeLambdas & lambdas)
const AnalyzeLambdas & lambdas,
ExecuteTableFunctions & table_functions)
{
const ASTFunction * function = typeid_cast<const ASTFunction *>(ast.get());
@ -428,7 +431,7 @@ void processImpl(
if (function && function->name == "lambda")
continue;
processImpl(child, context, aliases, columns, info, lambdas);
processImpl(child, context, aliases, columns, info, lambdas, table_functions);
}
}
@ -453,25 +456,28 @@ void processImpl(
{
/// If this is higher-order function, determine types of lambda arguments and infer types of subexpressions inside lambdas.
if (lambdas.higher_order_functions.end() != std::find(lambdas.higher_order_functions.begin(), lambdas.higher_order_functions.end(), ast))
processHigherOrderFunction(column_name, ast, context, aliases, columns, info, lambdas);
processHigherOrderFunction(column_name, ast, context, aliases, columns, info, lambdas, table_functions);
processFunction(column_name, ast, info, context);
}
else if (literal)
processLiteral(column_name, ast, info);
else if (identifier)
processIdentifier(column_name, ast, info, context, aliases, columns, lambdas);
processIdentifier(column_name, ast, info, context, aliases, columns, lambdas, table_functions);
else if (subquery)
processScalarSubquery(column_name, ast, info, context);
processScalarSubquery(column_name, ast, info, context, table_functions);
}
}
void TypeAndConstantInference::process(ASTPtr & ast, const Context & context,
CollectAliases & aliases, const AnalyzeColumns & columns, const AnalyzeLambdas & lambdas)
CollectAliases & aliases,
const AnalyzeColumns & columns,
const AnalyzeLambdas & lambdas,
ExecuteTableFunctions & table_functions)
{
processImpl(ast, context, aliases, columns, info, lambdas);
processImpl(ast, context, aliases, columns, info, lambdas, table_functions);
}

View File

@ -14,6 +14,7 @@ class WriteBuffer;
struct CollectAliases;
struct AnalyzeColumns;
struct AnalyzeLambdas;
struct ExecuteTableFunctions;
class IFunction;
class IAggregateFunction;
@ -33,7 +34,8 @@ struct TypeAndConstantInference
void process(ASTPtr & ast, const Context & context,
CollectAliases & aliases,
const AnalyzeColumns & columns,
const AnalyzeLambdas & analyze_lambdas);
const AnalyzeLambdas & analyze_lambdas,
ExecuteTableFunctions & table_functions);
struct ExpressionInfo
{

View File

@ -8,7 +8,8 @@ add_executable(analyze_columns analyze_columns.cpp)
target_link_libraries(analyze_columns dbms clickhouse_storages_system)
add_executable(type_and_constant_inference type_and_constant_inference.cpp)
target_link_libraries(type_and_constant_inference clickhouse_storages_system clickhouse_functions dbms)
target_link_libraries(type_and_constant_inference
clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions dbms)
add_executable(analyze_result_of_query analyze_result_of_query.cpp)
target_link_libraries(analyze_result_of_query dbms clickhouse_storages_system)

View File

@ -2,6 +2,7 @@
#include <Analyzers/CollectTables.h>
#include <Analyzers/AnalyzeColumns.h>
#include <Analyzers/AnalyzeLambdas.h>
#include <Analyzers/ExecuteTableFunctions.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/formatAST.h>
@ -36,7 +37,7 @@ try
auto system_database = std::make_shared<DatabaseMemory>("system");
context.addDatabase("system", system_database);
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers", false));
context.setCurrentDatabase("system");
AnalyzeLambdas analyze_lambdas;
@ -45,8 +46,11 @@ try
CollectAliases collect_aliases;
collect_aliases.process(ast);
ExecuteTableFunctions execute_table_functions;
execute_table_functions.process(ast, context);
CollectTables collect_tables;
collect_tables.process(ast, context, collect_aliases);
collect_tables.process(ast, context, collect_aliases, execute_table_functions);
AnalyzeColumns analyze_columns;
analyze_columns.process(ast, collect_aliases, collect_tables);

View File

@ -1,4 +1,5 @@
#include <Analyzers/AnalyzeResultOfQuery.h>
#include <Analyzers/ExecuteTableFunctions.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserSelectQuery.h>
#include <IO/WriteBufferFromFileDescriptor.h>
@ -32,11 +33,14 @@ try
auto system_database = std::make_shared<DatabaseMemory>("system");
context.addDatabase("system", system_database);
context.setCurrentDatabase("system");
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers", false));
ExecuteTableFunctions execute_table_functions;
execute_table_functions.process(ast, context);
AnalyzeResultOfQuery analyzer;
analyzer.process(ast, context);
analyzer.process(ast, context, execute_table_functions);
analyzer.dump(out);

View File

@ -1,4 +1,5 @@
#include <Analyzers/CollectAliases.h>
#include <Analyzers/ExecuteTableFunctions.h>
#include <Analyzers/CollectTables.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserSelectQuery.h>
@ -33,14 +34,17 @@ try
auto system_database = std::make_shared<DatabaseMemory>("system");
context.addDatabase("system", system_database);
context.setCurrentDatabase("system");
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers", false));
CollectAliases collect_aliases;
collect_aliases.process(ast);
ExecuteTableFunctions execute_table_functions;
execute_table_functions.process(ast, context);
CollectTables collect_tables;
collect_tables.process(ast, context, collect_aliases);
collect_tables.process(ast, context, collect_aliases, execute_table_functions);
collect_tables.dump(out);
return 0;

View File

@ -2,6 +2,7 @@
#include <Analyzers/CollectTables.h>
#include <Analyzers/AnalyzeColumns.h>
#include <Analyzers/AnalyzeLambdas.h>
#include <Analyzers/ExecuteTableFunctions.h>
#include <Analyzers/TypeAndConstantInference.h>
#include <Analyzers/TranslatePositionalArguments.h>
#include <Analyzers/OptimizeGroupOrderLimitBy.h>
@ -38,8 +39,8 @@ try
auto system_database = std::make_shared<DatabaseMemory>("system");
context.addDatabase("system", system_database);
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers", false));
context.setCurrentDatabase("system");
AnalyzeLambdas analyze_lambdas;
@ -48,14 +49,17 @@ try
CollectAliases collect_aliases;
collect_aliases.process(ast);
ExecuteTableFunctions execute_table_functions;
execute_table_functions.process(ast, context);
CollectTables collect_tables;
collect_tables.process(ast, context, collect_aliases);
collect_tables.process(ast, context, collect_aliases, execute_table_functions);
AnalyzeColumns analyze_columns;
analyze_columns.process(ast, collect_aliases, collect_tables);
TypeAndConstantInference inference;
inference.process(ast, context, collect_aliases, analyze_columns, analyze_lambdas);
inference.process(ast, context, collect_aliases, analyze_columns, analyze_lambdas, execute_table_functions);
TranslatePositionalArguments translation;
translation.process(ast);

View File

@ -2,6 +2,7 @@
#include <Analyzers/CollectTables.h>
#include <Analyzers/AnalyzeColumns.h>
#include <Analyzers/AnalyzeLambdas.h>
#include <Analyzers/ExecuteTableFunctions.h>
#include <Analyzers/TypeAndConstantInference.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserSelectQuery.h>
@ -15,6 +16,8 @@
#include <Storages/System/StorageSystemNumbers.h>
#include <Databases/DatabaseMemory.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
/// Parses query from stdin and print data types of expressions; and for constant expressions, print its values.
@ -25,6 +28,8 @@ try
using namespace DB;
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
ReadBufferFromFileDescriptor in(STDIN_FILENO);
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
@ -39,8 +44,8 @@ try
auto system_database = std::make_shared<DatabaseMemory>("system");
context.addDatabase("system", system_database);
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers", false));
context.setCurrentDatabase("system");
AnalyzeLambdas analyze_lambdas;
@ -49,14 +54,17 @@ try
CollectAliases collect_aliases;
collect_aliases.process(ast);
ExecuteTableFunctions execute_table_functions;
execute_table_functions.process(ast, context);
CollectTables collect_tables;
collect_tables.process(ast, context, collect_aliases);
collect_tables.process(ast, context, collect_aliases, execute_table_functions);
AnalyzeColumns analyze_columns;
analyze_columns.process(ast, collect_aliases, collect_tables);
TypeAndConstantInference inference;
inference.process(ast, context, collect_aliases, analyze_columns, analyze_lambdas);
inference.process(ast, context, collect_aliases, analyze_columns, analyze_lambdas, execute_table_functions);
inference.dump(out);
out.next();

View File

@ -5,6 +5,9 @@ install (FILES config.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client CO
add_library (clickhouse-benchmark Benchmark.cpp)
target_link_libraries (clickhouse-benchmark dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
add_library (clickhouse-performance-test PerformanceTest.cpp)
target_link_libraries (clickhouse-performance-test dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
if (ENABLE_TESTS)
add_subdirectory (tests)
endif ()

View File

@ -191,6 +191,7 @@ private:
APPLY_FOR_LIMITS(EXTRACT_LIMIT)
#undef EXTRACT_LIMIT
/// FIXME Why do we need this?
registerFunctions();
registerAggregateFunctions();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<profiles>
<default></default>
<single_thread>
<max_threads>1</max_threads>
</single_thread>
</profiles>

View File

@ -10,7 +10,7 @@
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <ext/enumerate.hpp>
#include <ext/enumerate.h>
namespace DB

View File

@ -240,6 +240,8 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
if (!limit)
limit = end_idx;
else
limit = std::min(end_idx, limit);
while (read_idx < limit && !isNullAt(res[read_idx]))
{
@ -271,7 +273,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
}
else
{
/// Shift all NULL values to the begin.
/// Shift all NULL values to the beginning.
ssize_t read_idx = res.size() - 1;
ssize_t write_idx = res.size() - 1;

View File

@ -1,6 +1,6 @@
#include <Columns/ColumnTuple.h>
#include <ext/map.hpp>
#include <ext/range.hpp>
#include <ext/map.h>
#include <ext/range.h>
namespace DB

View File

@ -11,7 +11,7 @@
#include <Columns/ColumnVector.h>
#include <ext/bit_cast.hpp>
#include <ext/bit_cast.h>
#if __SSE2__
#include <emmintrin.h>

View File

@ -10,7 +10,7 @@
#include <boost/intrusive/list.hpp>
#include <boost/intrusive/set.hpp>
#include <boost/noncopyable.hpp>
#include <ext/scope_guard.hpp>
#include <ext/scope_guard.h>
#include <Common/Exception.h>
#include <Common/randomSeed.h>

View File

@ -6,7 +6,8 @@
#include <Poco/Mutex.h>
#include <Poco/Semaphore.h>
#include <Core/Types.h>
#include <common/Types.h>
namespace detail
{
@ -66,8 +67,8 @@ public:
fill_count.set();
}
template <class ... Args>
void emplace(Args && ... args)
template <typename... Args>
void emplace(Args &&... args)
{
empty_count.wait();
{
@ -88,7 +89,7 @@ public:
empty_count.set();
}
bool tryPush(const T & x, DB::UInt64 milliseconds = 0)
bool tryPush(const T & x, UInt64 milliseconds = 0)
{
if (empty_count.tryWait(milliseconds))
{
@ -102,8 +103,8 @@ public:
return false;
}
template <class ... Args>
bool tryEmplace(DB::UInt64 milliseconds, Args && ... args)
template <typename... Args>
bool tryEmplace(UInt64 milliseconds, Args &&... args)
{
if (empty_count.tryWait(milliseconds))
{
@ -117,7 +118,7 @@ public:
return false;
}
bool tryPop(T & x, DB::UInt64 milliseconds = 0)
bool tryPop(T & x, UInt64 milliseconds = 0)
{
if (fill_count.tryWait(milliseconds))
{

View File

@ -198,6 +198,7 @@ public:
/// Create table
NamesAndTypesListPtr columns = std::make_shared<NamesAndTypesList>(sample_block.getColumnsList());
StoragePtr storage = StorageMemory::create(data.second, columns);
storage->startup();
context.addExternalTable(data.second, storage);
BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef());

View File

@ -70,11 +70,11 @@ template<> struct MinCounterTypeHelper<3> { using Type = UInt64; };
/// Used in HyperLogLogCounter in order to spend memory efficiently.
template<UInt64 MaxValue> struct MinCounterType
{
typedef typename MinCounterTypeHelper<
using Type = typename MinCounterTypeHelper<
(MaxValue >= 1 << 8) +
(MaxValue >= 1 << 16) +
(MaxValue >= 1ULL << 32)
>::Type Type;
>::Type;
};
/// Denominator of expression for HyperLogLog algorithm.

View File

@ -4,8 +4,9 @@
#include <cstdlib>
#include <climits>
#include <random>
#include <functional>
#include <common/Types.h>
#include <ext/scope_guard.hpp>
#include <ext/scope_guard.h>
#include <Core/Types.h>
#include <Common/PoolBase.h>
#include <Common/ProfileEvents.h>

View File

@ -8,7 +8,7 @@
#include <cstdint>
#include <type_traits>
#include <ext/bit_cast.hpp>
#include <ext/bit_cast.h>
#include <Core/Types.h>
#include <Core/Defines.h>

View File

@ -3,7 +3,7 @@
#include <map>
#include <tuple>
#include <mutex>
#include <ext/function_traits.hpp>
#include <ext/function_traits.h>
/** The simplest cache for a free function.

View File

@ -2,9 +2,9 @@
#include <Common/Arena.h>
#include <common/likely.h>
#include <ext/range.hpp>
#include <ext/size.hpp>
#include <ext/bit_cast.hpp>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/bit_cast.h>
#include <cstdlib>
#include <memory>

View File

@ -1,7 +1,7 @@
#pragma once
#include <Common/UTF8Helpers.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Unicode.h>
#include <stdint.h>

View File

@ -5,7 +5,7 @@
#include <Core/Types.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Unicode.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <stdint.h>
#include <string.h>

View File

@ -1,4 +1,4 @@
#include <IO/ReadHelpers.h>
#include <Common/hex.h>
#include <Common/StringUtils.h>
#include <Common/escapeForFileName.h>
@ -11,8 +11,6 @@ std::string escapeForFileName(const std::string & s)
const char * pos = s.data();
const char * end = pos + s.size();
static const char * hex = "0123456789ABCDEF";
while (pos != end)
{
char c = *pos;
@ -22,8 +20,8 @@ std::string escapeForFileName(const std::string & s)
else
{
res += '%';
res += hex[c / 16];
res += hex[c % 16];
res += hexUppercase(c / 16);
res += hexUppercase(c % 16);
}
++pos;

View File

@ -1,6 +1,9 @@
#include <Common/hex.h>
const char * const char_to_digit_table = (
const char * const hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
const char * const hex_digit_to_char_lowercase_table = "0123456789abcdef";
const char * const hex_char_to_digit_table =
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
@ -16,5 +19,4 @@ const char * const char_to_digit_table = (
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
);
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";

View File

@ -1,3 +1,27 @@
#pragma once
extern const char * const char_to_digit_table;
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
extern const char * const hex_digit_to_char_uppercase_table;
extern const char * const hex_digit_to_char_lowercase_table;
inline char hexUppercase(unsigned char c)
{
return hex_digit_to_char_uppercase_table[c];
}
inline char hexLowercase(unsigned char c)
{
return hex_digit_to_char_lowercase_table[c];
}
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
extern const char * const hex_char_to_digit_table;
inline char unhex(char c)
{
return hex_char_to_digit_table[static_cast<unsigned char>(c)];
}

View File

@ -11,8 +11,8 @@
#include <memory>
#include <array>
#include <sys/resource.h>
#include <ext/bit_cast.hpp>
#include <ext/size.hpp>
#include <ext/bit_cast.h>
#include <ext/size.h>
#include <Common/Arena.h>
#include <Core/StringRef.h>

View File

@ -58,9 +58,9 @@ int main(int argc, char ** argv)
}
{
typedef HashSet<
using Cont = HashSet<
DB::UInt128,
DB::UInt128TrivialHash> Cont;
DB::UInt128TrivialHash>;
Cont cont;
std::string dump;

View File

@ -53,14 +53,14 @@ struct __attribute__((__aligned__(64))) AlignedSmallLock : public SmallLock
using Mutex = std::mutex;
/*typedef HashTableWithSmallLocks<
/*using MapSmallLocks = HashTableWithSmallLocks<
Key,
HashTableCellWithLock<
Key,
HashMapCell<Key, Value, DefaultHash<Key> > >,
DefaultHash<Key>,
HashTableGrower<21>,
HashTableAllocator> MapSmallLocks;*/
HashTableAllocator>;*/
void aggregate1(Map & map, Source::const_iterator begin, Source::const_iterator end)

View File

@ -1,7 +1,7 @@
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#include <ext/bit_cast.hpp>
#include <ext/bit_cast.h>
#include <Common/RadixSort.h>
#include <Common/Stopwatch.h>
#include <IO/ReadHelpers.h>

View File

@ -374,6 +374,9 @@ namespace ErrorCodes
extern const int ALL_REPLICAS_ARE_STALE = 369;
extern const int DATA_TYPE_CANNOT_BE_USED_IN_TABLES = 370;
extern const int INCONSISTENT_CLUSTER_DEFINITION = 371;
extern const int SESSION_NOT_FOUND = 372;
extern const int SESSION_IS_LOCKED = 373;
extern const int INVALID_SESSION_TIMEOUT = 374;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -2,7 +2,7 @@
#include <DataStreams/MaterializingBlockOutputStream.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <ext/range.hpp>
#include <ext/range.h>
namespace DB

View File

@ -83,6 +83,11 @@ Block MergeSortingBlockInputStream::readImpl()
removeConstantsFromSortDescription(sample_block, description);
}
/// If there were only const columns in sort description, then there is no need to sort.
/// Return the blocks as is.
if (description.empty())
return block;
removeConstantsFromBlock(block);
blocks.push_back(block);

View File

@ -15,6 +15,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
PrettyBlockOutputStream::PrettyBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_)
: ostr(ostr_), max_rows(max_rows_), no_escapes(no_escapes_), context(context_)
{

View File

@ -42,7 +42,7 @@ try
chain.finalize();
ExpressionActionsPtr expression = chain.getLastActions();
StoragePtr table = StorageSystemNumbers::create("Numbers");
StoragePtr table = StorageSystemNumbers::create("numbers", false);
Names column_names;
column_names.push_back("number");
@ -50,7 +50,7 @@ try
QueryProcessingStage::Enum stage;
BlockInputStreamPtr in;
in = table->read(column_names, 0, context, stage)[0];
in = table->read(column_names, 0, context, stage, 8192, 1)[0];
in = std::make_shared<ExpressionBlockInputStream>(in, expression);
in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));

View File

@ -48,14 +48,14 @@ try
chain.finalize();
ExpressionActionsPtr expression = chain.getLastActions();
StoragePtr table = StorageSystemNumbers::create("Numbers");
StoragePtr table = StorageSystemNumbers::create("numbers", false);
Names column_names;
column_names.push_back("number");
QueryProcessingStage::Enum stage;
BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0];
BlockInputStreamPtr in = table->read(column_names, 0, context, stage, 8192, 1)[0];
in = std::make_shared<FilterBlockInputStream>(in, expression, 1);
in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));

View File

@ -35,62 +35,62 @@ int main(int argc, char ** argv)
{
NamesAndTypesList names_and_types_list
{
{"WatchID", std::make_shared<DataTypeUInt64>()},
{"JavaEnable", std::make_shared<DataTypeUInt8>()},
{"Title", std::make_shared<DataTypeString>()},
{"EventTime", std::make_shared<DataTypeDateTime>()},
{"CounterID", std::make_shared<DataTypeUInt32>()},
{"ClientIP", std::make_shared<DataTypeUInt32>()},
{"RegionID", std::make_shared<DataTypeUInt32>()},
{"UniqID", std::make_shared<DataTypeUInt64>()},
{"CounterClass", std::make_shared<DataTypeUInt8>()},
{"OS", std::make_shared<DataTypeUInt8>()},
{"UserAgent", std::make_shared<DataTypeUInt8>()},
{"URL", std::make_shared<DataTypeString>()},
{"Referer", std::make_shared<DataTypeString>()},
{"ResolutionWidth", std::make_shared<DataTypeUInt16>()},
{"ResolutionHeight", std::make_shared<DataTypeUInt16>()},
{"ResolutionDepth", std::make_shared<DataTypeUInt8>()},
{"FlashMajor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor2", std::make_shared<DataTypeString>()},
{"NetMajor", std::make_shared<DataTypeUInt8>()},
{"NetMinor", std::make_shared<DataTypeUInt8>()},
{"UserAgentMajor", std::make_shared<DataTypeUInt16>()},
{"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)},
{"CookieEnable", std::make_shared<DataTypeUInt8>()},
{"JavascriptEnable", std::make_shared<DataTypeUInt8>()},
{"IsMobile", std::make_shared<DataTypeUInt8>()},
{"MobilePhone", std::make_shared<DataTypeUInt8>()},
{"MobilePhoneModel", std::make_shared<DataTypeString>()},
{"Params", std::make_shared<DataTypeString>()},
{"IPNetworkID", std::make_shared<DataTypeUInt32>()},
{"TraficSourceID", std::make_shared<DataTypeInt8>()},
{"SearchEngineID", std::make_shared<DataTypeUInt16>()},
{"SearchPhrase", std::make_shared<DataTypeString>()},
{"AdvEngineID", std::make_shared<DataTypeUInt8>()},
{"IsArtifical", std::make_shared<DataTypeUInt8>()},
{"WindowClientWidth", std::make_shared<DataTypeUInt16>()},
{"WindowClientHeight", std::make_shared<DataTypeUInt16>()},
{"ClientTimeZone", std::make_shared<DataTypeInt16>()},
{"ClientEventTime", std::make_shared<DataTypeDateTime>()},
{"SilverlightVersion1", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion2", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion3", std::make_shared<DataTypeUInt32>()},
{"SilverlightVersion4", std::make_shared<DataTypeUInt16>()},
{"PageCharset", std::make_shared<DataTypeString>()},
{"CodeVersion", std::make_shared<DataTypeUInt32>()},
{"IsLink", std::make_shared<DataTypeUInt8>()},
{"IsDownload", std::make_shared<DataTypeUInt8>()},
{"IsNotBounce", std::make_shared<DataTypeUInt8>()},
{"FUniqID", std::make_shared<DataTypeUInt64>()},
{"OriginalURL", std::make_shared<DataTypeString>()},
{"HID", std::make_shared<DataTypeUInt32>()},
{"IsOldCounter", std::make_shared<DataTypeUInt8>()},
{"IsEvent", std::make_shared<DataTypeUInt8>()},
{"IsParameter", std::make_shared<DataTypeUInt8>()},
{"DontCountHits", std::make_shared<DataTypeUInt8>()},
{"WithHash", std::make_shared<DataTypeUInt8>()},
{"WatchID", std::make_shared<DataTypeUInt64>()},
{"JavaEnable", std::make_shared<DataTypeUInt8>()},
{"Title", std::make_shared<DataTypeString>()},
{"EventTime", std::make_shared<DataTypeDateTime>()},
{"CounterID", std::make_shared<DataTypeUInt32>()},
{"ClientIP", std::make_shared<DataTypeUInt32>()},
{"RegionID", std::make_shared<DataTypeUInt32>()},
{"UniqID", std::make_shared<DataTypeUInt64>()},
{"CounterClass", std::make_shared<DataTypeUInt8>()},
{"OS", std::make_shared<DataTypeUInt8>()},
{"UserAgent", std::make_shared<DataTypeUInt8>()},
{"URL", std::make_shared<DataTypeString>()},
{"Referer", std::make_shared<DataTypeString>()},
{"ResolutionWidth", std::make_shared<DataTypeUInt16>()},
{"ResolutionHeight", std::make_shared<DataTypeUInt16>()},
{"ResolutionDepth", std::make_shared<DataTypeUInt8>()},
{"FlashMajor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor2", std::make_shared<DataTypeString>()},
{"NetMajor", std::make_shared<DataTypeUInt8>()},
{"NetMinor", std::make_shared<DataTypeUInt8>()},
{"UserAgentMajor", std::make_shared<DataTypeUInt16>()},
{"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)},
{"CookieEnable", std::make_shared<DataTypeUInt8>()},
{"JavascriptEnable", std::make_shared<DataTypeUInt8>()},
{"IsMobile", std::make_shared<DataTypeUInt8>()},
{"MobilePhone", std::make_shared<DataTypeUInt8>()},
{"MobilePhoneModel", std::make_shared<DataTypeString>()},
{"Params", std::make_shared<DataTypeString>()},
{"IPNetworkID", std::make_shared<DataTypeUInt32>()},
{"TraficSourceID", std::make_shared<DataTypeInt8>()},
{"SearchEngineID", std::make_shared<DataTypeUInt16>()},
{"SearchPhrase", std::make_shared<DataTypeString>()},
{"AdvEngineID", std::make_shared<DataTypeUInt8>()},
{"IsArtifical", std::make_shared<DataTypeUInt8>()},
{"WindowClientWidth", std::make_shared<DataTypeUInt16>()},
{"WindowClientHeight", std::make_shared<DataTypeUInt16>()},
{"ClientTimeZone", std::make_shared<DataTypeInt16>()},
{"ClientEventTime", std::make_shared<DataTypeDateTime>()},
{"SilverlightVersion1", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion2", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion3", std::make_shared<DataTypeUInt32>()},
{"SilverlightVersion4", std::make_shared<DataTypeUInt16>()},
{"PageCharset", std::make_shared<DataTypeString>()},
{"CodeVersion", std::make_shared<DataTypeUInt32>()},
{"IsLink", std::make_shared<DataTypeUInt8>()},
{"IsDownload", std::make_shared<DataTypeUInt8>()},
{"IsNotBounce", std::make_shared<DataTypeUInt8>()},
{"FUniqID", std::make_shared<DataTypeUInt64>()},
{"OriginalURL", std::make_shared<DataTypeString>()},
{"HID", std::make_shared<DataTypeUInt32>()},
{"IsOldCounter", std::make_shared<DataTypeUInt8>()},
{"IsEvent", std::make_shared<DataTypeUInt8>()},
{"IsParameter", std::make_shared<DataTypeUInt8>()},
{"DontCountHits", std::make_shared<DataTypeUInt8>()},
{"WithHash", std::make_shared<DataTypeUInt8>()},
};
Context context;
@ -105,7 +105,9 @@ int main(int argc, char ** argv)
/// create an object of an existing hit log table
StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list));
StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list),
NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, DEFAULT_MAX_COMPRESS_BLOCK_SIZE);
table->startup();
/// read from it, apply the expression, filter, and write in tsv form to the console
@ -126,7 +128,7 @@ int main(int argc, char ** argv)
QueryProcessingStage::Enum stage;
BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0];
BlockInputStreamPtr in = table->read(column_names, 0, context, stage, 8192, 1)[0];
in = std::make_shared<FilterBlockInputStream>(in, expression, 4);
//in = std::make_shared<LimitBlockInputStream>(in, 10, 0);

View File

@ -65,14 +65,14 @@ try
chain.finalize();
ExpressionActionsPtr expression = chain.getLastActions();
StoragePtr table = StorageSystemNumbers::create("Numbers");
StoragePtr table = StorageSystemNumbers::create("numbers", false);
Names column_names;
column_names.push_back("number");
QueryProcessingStage::Enum stage;
BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0];
BlockInputStreamPtr in = table->read(column_names, 0, context, stage, 8192, 1)[0];
ForkBlockInputStreams fork(in);

View File

@ -30,62 +30,62 @@ try
NamesAndTypesList names_and_types_list
{
{"WatchID", std::make_shared<DataTypeUInt64>()},
{"JavaEnable", std::make_shared<DataTypeUInt8>()},
{"Title", std::make_shared<DataTypeString>()},
{"EventTime", std::make_shared<DataTypeDateTime>()},
{"CounterID", std::make_shared<DataTypeUInt32>()},
{"ClientIP", std::make_shared<DataTypeUInt32>()},
{"RegionID", std::make_shared<DataTypeUInt32>()},
{"UniqID", std::make_shared<DataTypeUInt64>()},
{"CounterClass", std::make_shared<DataTypeUInt8>()},
{"OS", std::make_shared<DataTypeUInt8>()},
{"UserAgent", std::make_shared<DataTypeUInt8>()},
{"URL", std::make_shared<DataTypeString>()},
{"Referer", std::make_shared<DataTypeString>()},
{"ResolutionWidth", std::make_shared<DataTypeUInt16>()},
{"ResolutionHeight", std::make_shared<DataTypeUInt16>()},
{"ResolutionDepth", std::make_shared<DataTypeUInt8>()},
{"FlashMajor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor2", std::make_shared<DataTypeString>()},
{"NetMajor", std::make_shared<DataTypeUInt8>()},
{"NetMinor", std::make_shared<DataTypeUInt8>()},
{"UserAgentMajor", std::make_shared<DataTypeUInt16>()},
{"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)},
{"CookieEnable", std::make_shared<DataTypeUInt8>()},
{"JavascriptEnable", std::make_shared<DataTypeUInt8>()},
{"IsMobile", std::make_shared<DataTypeUInt8>()},
{"MobilePhone", std::make_shared<DataTypeUInt8>()},
{"MobilePhoneModel", std::make_shared<DataTypeString>()},
{"Params", std::make_shared<DataTypeString>()},
{"IPNetworkID", std::make_shared<DataTypeUInt32>()},
{"TraficSourceID", std::make_shared<DataTypeInt8>()},
{"SearchEngineID", std::make_shared<DataTypeUInt16>()},
{"SearchPhrase", std::make_shared<DataTypeString>()},
{"AdvEngineID", std::make_shared<DataTypeUInt8>()},
{"IsArtifical", std::make_shared<DataTypeUInt8>()},
{"WindowClientWidth", std::make_shared<DataTypeUInt16>()},
{"WindowClientHeight", std::make_shared<DataTypeUInt16>()},
{"ClientTimeZone", std::make_shared<DataTypeInt16>()},
{"ClientEventTime", std::make_shared<DataTypeDateTime>()},
{"SilverlightVersion1", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion2", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion3", std::make_shared<DataTypeUInt32>()},
{"SilverlightVersion4", std::make_shared<DataTypeUInt16>()},
{"PageCharset", std::make_shared<DataTypeString>()},
{"CodeVersion", std::make_shared<DataTypeUInt32>()},
{"IsLink", std::make_shared<DataTypeUInt8>()},
{"IsDownload", std::make_shared<DataTypeUInt8>()},
{"IsNotBounce", std::make_shared<DataTypeUInt8>()},
{"FUniqID", std::make_shared<DataTypeUInt64>()},
{"OriginalURL", std::make_shared<DataTypeString>()},
{"HID", std::make_shared<DataTypeUInt32>()},
{"IsOldCounter", std::make_shared<DataTypeUInt8>()},
{"IsEvent", std::make_shared<DataTypeUInt8>()},
{"IsParameter", std::make_shared<DataTypeUInt8>()},
{"DontCountHits", std::make_shared<DataTypeUInt8>()},
{"WithHash", std::make_shared<DataTypeUInt8>()},
{"WatchID", std::make_shared<DataTypeUInt64>()},
{"JavaEnable", std::make_shared<DataTypeUInt8>()},
{"Title", std::make_shared<DataTypeString>()},
{"EventTime", std::make_shared<DataTypeDateTime>()},
{"CounterID", std::make_shared<DataTypeUInt32>()},
{"ClientIP", std::make_shared<DataTypeUInt32>()},
{"RegionID", std::make_shared<DataTypeUInt32>()},
{"UniqID", std::make_shared<DataTypeUInt64>()},
{"CounterClass", std::make_shared<DataTypeUInt8>()},
{"OS", std::make_shared<DataTypeUInt8>()},
{"UserAgent", std::make_shared<DataTypeUInt8>()},
{"URL", std::make_shared<DataTypeString>()},
{"Referer", std::make_shared<DataTypeString>()},
{"ResolutionWidth", std::make_shared<DataTypeUInt16>()},
{"ResolutionHeight", std::make_shared<DataTypeUInt16>()},
{"ResolutionDepth", std::make_shared<DataTypeUInt8>()},
{"FlashMajor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor2", std::make_shared<DataTypeString>()},
{"NetMajor", std::make_shared<DataTypeUInt8>()},
{"NetMinor", std::make_shared<DataTypeUInt8>()},
{"UserAgentMajor", std::make_shared<DataTypeUInt16>()},
{"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)},
{"CookieEnable", std::make_shared<DataTypeUInt8>()},
{"JavascriptEnable", std::make_shared<DataTypeUInt8>()},
{"IsMobile", std::make_shared<DataTypeUInt8>()},
{"MobilePhone", std::make_shared<DataTypeUInt8>()},
{"MobilePhoneModel", std::make_shared<DataTypeString>()},
{"Params", std::make_shared<DataTypeString>()},
{"IPNetworkID", std::make_shared<DataTypeUInt32>()},
{"TraficSourceID", std::make_shared<DataTypeInt8>()},
{"SearchEngineID", std::make_shared<DataTypeUInt16>()},
{"SearchPhrase", std::make_shared<DataTypeString>()},
{"AdvEngineID", std::make_shared<DataTypeUInt8>()},
{"IsArtifical", std::make_shared<DataTypeUInt8>()},
{"WindowClientWidth", std::make_shared<DataTypeUInt16>()},
{"WindowClientHeight", std::make_shared<DataTypeUInt16>()},
{"ClientTimeZone", std::make_shared<DataTypeInt16>()},
{"ClientEventTime", std::make_shared<DataTypeDateTime>()},
{"SilverlightVersion1", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion2", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion3", std::make_shared<DataTypeUInt32>()},
{"SilverlightVersion4", std::make_shared<DataTypeUInt16>()},
{"PageCharset", std::make_shared<DataTypeString>()},
{"CodeVersion", std::make_shared<DataTypeUInt32>()},
{"IsLink", std::make_shared<DataTypeUInt8>()},
{"IsDownload", std::make_shared<DataTypeUInt8>()},
{"IsNotBounce", std::make_shared<DataTypeUInt8>()},
{"FUniqID", std::make_shared<DataTypeUInt64>()},
{"OriginalURL", std::make_shared<DataTypeString>()},
{"HID", std::make_shared<DataTypeUInt32>()},
{"IsOldCounter", std::make_shared<DataTypeUInt8>()},
{"IsEvent", std::make_shared<DataTypeUInt8>()},
{"IsParameter", std::make_shared<DataTypeUInt8>()},
{"DontCountHits", std::make_shared<DataTypeUInt8>()},
{"WithHash", std::make_shared<DataTypeUInt8>()},
};
Names column_names;
@ -95,13 +95,15 @@ try
/// create an object of an existing hit log table
StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list));
StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list),
NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, DEFAULT_MAX_COMPRESS_BLOCK_SIZE);
table->startup();
/// read from it
if (argc == 2 && 0 == strcmp(argv[1], "read"))
{
QueryProcessingStage::Enum stage;
BlockInputStreamPtr in = table->read(column_names, 0, Context{}, stage)[0];
BlockInputStreamPtr in = table->read(column_names, 0, Context{}, stage, 8192, 1)[0];
WriteBufferFromFileDescriptor out1(STDOUT_FILENO);
CompressedWriteBuffer out2(out1);
NativeBlockOutputStream out3(out2, ClickHouseRevision::get());

View File

@ -34,62 +34,62 @@ try
{
NamesAndTypesList names_and_types_list
{
{"WatchID", std::make_shared<DataTypeUInt64>()},
{"JavaEnable", std::make_shared<DataTypeUInt8>()},
{"Title", std::make_shared<DataTypeString>()},
{"EventTime", std::make_shared<DataTypeDateTime>()},
{"CounterID", std::make_shared<DataTypeUInt32>()},
{"ClientIP", std::make_shared<DataTypeUInt32>()},
{"RegionID", std::make_shared<DataTypeUInt32>()},
{"UniqID", std::make_shared<DataTypeUInt64>()},
{"CounterClass", std::make_shared<DataTypeUInt8>()},
{"OS", std::make_shared<DataTypeUInt8>()},
{"UserAgent", std::make_shared<DataTypeUInt8>()},
{"URL", std::make_shared<DataTypeString>()},
{"Referer", std::make_shared<DataTypeString>()},
{"ResolutionWidth", std::make_shared<DataTypeUInt16>()},
{"ResolutionHeight", std::make_shared<DataTypeUInt16>()},
{"ResolutionDepth", std::make_shared<DataTypeUInt8>()},
{"FlashMajor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor2", std::make_shared<DataTypeString>()},
{"NetMajor", std::make_shared<DataTypeUInt8>()},
{"NetMinor", std::make_shared<DataTypeUInt8>()},
{"UserAgentMajor", std::make_shared<DataTypeUInt16>()},
{"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)},
{"CookieEnable", std::make_shared<DataTypeUInt8>()},
{"JavascriptEnable", std::make_shared<DataTypeUInt8>()},
{"IsMobile", std::make_shared<DataTypeUInt8>()},
{"MobilePhone", std::make_shared<DataTypeUInt8>()},
{"MobilePhoneModel", std::make_shared<DataTypeString>()},
{"Params", std::make_shared<DataTypeString>()},
{"IPNetworkID", std::make_shared<DataTypeUInt32>()},
{"TraficSourceID", std::make_shared<DataTypeInt8>()},
{"SearchEngineID", std::make_shared<DataTypeUInt16>()},
{"SearchPhrase", std::make_shared<DataTypeString>()},
{"AdvEngineID", std::make_shared<DataTypeUInt8>()},
{"IsArtifical", std::make_shared<DataTypeUInt8>()},
{"WindowClientWidth", std::make_shared<DataTypeUInt16>()},
{"WindowClientHeight", std::make_shared<DataTypeUInt16>()},
{"ClientTimeZone", std::make_shared<DataTypeInt16>()},
{"ClientEventTime", std::make_shared<DataTypeDateTime>()},
{"SilverlightVersion1", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion2", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion3", std::make_shared<DataTypeUInt32>()},
{"SilverlightVersion4", std::make_shared<DataTypeUInt16>()},
{"PageCharset", std::make_shared<DataTypeString>()},
{"CodeVersion", std::make_shared<DataTypeUInt32>()},
{"IsLink", std::make_shared<DataTypeUInt8>()},
{"IsDownload", std::make_shared<DataTypeUInt8>()},
{"IsNotBounce", std::make_shared<DataTypeUInt8>()},
{"FUniqID", std::make_shared<DataTypeUInt64>()},
{"OriginalURL", std::make_shared<DataTypeString>()},
{"HID", std::make_shared<DataTypeUInt32>()},
{"IsOldCounter", std::make_shared<DataTypeUInt8>()},
{"IsEvent", std::make_shared<DataTypeUInt8>()},
{"IsParameter", std::make_shared<DataTypeUInt8>()},
{"DontCountHits", std::make_shared<DataTypeUInt8>()},
{"WithHash", std::make_shared<DataTypeUInt8>()},
{"WatchID", std::make_shared<DataTypeUInt64>()},
{"JavaEnable", std::make_shared<DataTypeUInt8>()},
{"Title", std::make_shared<DataTypeString>()},
{"EventTime", std::make_shared<DataTypeDateTime>()},
{"CounterID", std::make_shared<DataTypeUInt32>()},
{"ClientIP", std::make_shared<DataTypeUInt32>()},
{"RegionID", std::make_shared<DataTypeUInt32>()},
{"UniqID", std::make_shared<DataTypeUInt64>()},
{"CounterClass", std::make_shared<DataTypeUInt8>()},
{"OS", std::make_shared<DataTypeUInt8>()},
{"UserAgent", std::make_shared<DataTypeUInt8>()},
{"URL", std::make_shared<DataTypeString>()},
{"Referer", std::make_shared<DataTypeString>()},
{"ResolutionWidth", std::make_shared<DataTypeUInt16>()},
{"ResolutionHeight", std::make_shared<DataTypeUInt16>()},
{"ResolutionDepth", std::make_shared<DataTypeUInt8>()},
{"FlashMajor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor", std::make_shared<DataTypeUInt8>()},
{"FlashMinor2", std::make_shared<DataTypeString>()},
{"NetMajor", std::make_shared<DataTypeUInt8>()},
{"NetMinor", std::make_shared<DataTypeUInt8>()},
{"UserAgentMajor", std::make_shared<DataTypeUInt16>()},
{"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)},
{"CookieEnable", std::make_shared<DataTypeUInt8>()},
{"JavascriptEnable", std::make_shared<DataTypeUInt8>()},
{"IsMobile", std::make_shared<DataTypeUInt8>()},
{"MobilePhone", std::make_shared<DataTypeUInt8>()},
{"MobilePhoneModel", std::make_shared<DataTypeString>()},
{"Params", std::make_shared<DataTypeString>()},
{"IPNetworkID", std::make_shared<DataTypeUInt32>()},
{"TraficSourceID", std::make_shared<DataTypeInt8>()},
{"SearchEngineID", std::make_shared<DataTypeUInt16>()},
{"SearchPhrase", std::make_shared<DataTypeString>()},
{"AdvEngineID", std::make_shared<DataTypeUInt8>()},
{"IsArtifical", std::make_shared<DataTypeUInt8>()},
{"WindowClientWidth", std::make_shared<DataTypeUInt16>()},
{"WindowClientHeight", std::make_shared<DataTypeUInt16>()},
{"ClientTimeZone", std::make_shared<DataTypeInt16>()},
{"ClientEventTime", std::make_shared<DataTypeDateTime>()},
{"SilverlightVersion1", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion2", std::make_shared<DataTypeUInt8>()},
{"SilverlightVersion3", std::make_shared<DataTypeUInt32>()},
{"SilverlightVersion4", std::make_shared<DataTypeUInt16>()},
{"PageCharset", std::make_shared<DataTypeString>()},
{"CodeVersion", std::make_shared<DataTypeUInt32>()},
{"IsLink", std::make_shared<DataTypeUInt8>()},
{"IsDownload", std::make_shared<DataTypeUInt8>()},
{"IsNotBounce", std::make_shared<DataTypeUInt8>()},
{"FUniqID", std::make_shared<DataTypeUInt64>()},
{"OriginalURL", std::make_shared<DataTypeString>()},
{"HID", std::make_shared<DataTypeUInt32>()},
{"IsOldCounter", std::make_shared<DataTypeUInt8>()},
{"IsEvent", std::make_shared<DataTypeUInt8>()},
{"IsParameter", std::make_shared<DataTypeUInt8>()},
{"DontCountHits", std::make_shared<DataTypeUInt8>()},
{"WithHash", std::make_shared<DataTypeUInt8>()},
};
using NamesAndTypesMap = std::map<String, DataTypePtr>;
@ -107,7 +107,9 @@ try
/// create an object of an existing hit log table
StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list));
StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list),
NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, DEFAULT_MAX_COMPRESS_BLOCK_SIZE);
table->startup();
/// read from it, sort it, and write it in tsv form to the console
@ -143,7 +145,7 @@ try
QueryProcessingStage::Enum stage;
BlockInputStreamPtr in = table->read(column_names, 0, Context{}, stage, argc == 2 ? atoi(argv[1]) : 1048576)[0];
BlockInputStreamPtr in = table->read(column_names, 0, Context{}, stage, argc == 2 ? atoi(argv[1]) : 65536, 1)[0];
in = std::make_shared<PartialSortingBlockInputStream>(in, sort_columns);
in = std::make_shared<MergeSortingBlockInputStream>(in, sort_columns, DEFAULT_BLOCK_SIZE, 0, 0, "");
//in = std::make_shared<LimitBlockInputStream>(in, 10, 0);

View File

@ -25,7 +25,7 @@ using namespace DB;
void test1()
{
Context context;
StoragePtr table = StorageSystemNumbers::create("numbers");
StoragePtr table = StorageSystemNumbers::create("numbers", false);
Names column_names;
column_names.push_back("number");
@ -35,9 +35,9 @@ void test1()
QueryProcessingStage::Enum stage3;
BlockInputStreams streams;
streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage1, 1)[0], 30, 30000));
streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage2, 1)[0], 30, 2000));
streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage3, 1)[0], 30, 100));
streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage1, 1, 1)[0], 30, 30000));
streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage2, 1, 1)[0], 30, 2000));
streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage3, 1, 1)[0], 30, 100));
UnionBlockInputStream<> union_stream(streams, nullptr, 2);
@ -50,13 +50,12 @@ void test1()
out->write(block);
wb.next();
}
//copyData(union_stream, *out);
}
void test2()
{
Context context;
StoragePtr table = StorageSystemNumbers::create("numbers");
StoragePtr table = StorageSystemNumbers::create("numbers", false);
Names column_names;
column_names.push_back("number");
@ -85,15 +84,15 @@ void test2()
BlockInputStreams streams;
BlockInputStreamPtr stream1 = std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage1, 1)[0], 30, 30000);
BlockInputStreamPtr stream1 = std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage1, 1, 1)[0], 30, 30000);
stream1 = std::make_shared<BlockExtraInfoInputStream>(stream1, extra_info1);
streams.emplace_back(stream1);
BlockInputStreamPtr stream2 = std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage2, 1)[0], 30, 2000);
BlockInputStreamPtr stream2 = std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage2, 1, 1)[0], 30, 2000);
stream2 = std::make_shared<BlockExtraInfoInputStream>(stream2, extra_info2);
streams.emplace_back(stream2);
BlockInputStreamPtr stream3 = std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage3, 1)[0], 30, 100);
BlockInputStreamPtr stream3 = std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, stage3, 1, 1)[0], 30, 100);
stream3 = std::make_shared<BlockExtraInfoInputStream>(stream3, extra_info3);
streams.emplace_back(stream3);
@ -165,7 +164,6 @@ void test2()
out->write(out_block);
wb.next();
}
//copyData(union_stream, *out);
}
int main(int argc, char ** argv)

View File

@ -1,11 +1,12 @@
#include <DataStreams/verbosePrintString.h>
#include <Common/hex.h>
#include <IO/Operators.h>
namespace DB
{
void verbosePrintString(BufferBase::Position begin, BufferBase::Position end, WriteBuffer & out)
void verbosePrintString(const char * begin, const char * end, WriteBuffer & out)
{
if (end == begin)
{
@ -50,10 +51,7 @@ void verbosePrintString(BufferBase::Position begin, BufferBase::Position end, Wr
default:
{
if (*pos >= 0 && *pos < 32)
{
static const char * hex = "0123456789ABCDEF";
out << "<0x" << hex[*pos / 16] << hex[*pos % 16] << ">";
}
out << "<0x" << hexUppercase(*pos / 16) << hexUppercase(*pos % 16) << ">";
else
out << *pos;
}

View File

@ -1,12 +1,14 @@
#pragma once
#include <IO/WriteBuffer.h>
namespace DB
{
class WriteBuffer;
/** Print string in double quotes and with control characters in "<NAME>" form - for output diagnostic info to user.
*/
void verbosePrintString(BufferBase::Position begin, BufferBase::Position end, WriteBuffer & out);
void verbosePrintString(const char * begin, const char * end, WriteBuffer & out);
}

View File

@ -125,16 +125,16 @@ void DataTypeArray::serializeBinaryBulk(const IColumn & column, WriteBuffer & os
}
void DataTypeArray::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
void DataTypeArray::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const
{
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
ColumnArray::Offsets_t & offsets = column_array.getOffsets();
IColumn & nested_column = column_array.getData();
/// Number of values correlated with `offsets` must be read.
/// Number of values corresponding with `offsets` must be read.
size_t last_offset = (offsets.empty() ? 0 : offsets.back());
if (last_offset < nested_column.size())
throw Exception("Nested column longer than last offset", ErrorCodes::LOGICAL_ERROR);
throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR);
size_t nested_limit = last_offset - nested_column.size();
nested->deserializeBinaryBulk(nested_column, istr, nested_limit, 0);

View File

@ -26,7 +26,7 @@
#include <Parsers/parseQuery.h>
#include <DataTypes/DataTypeEnum.h>
#include <ext/map.hpp>
#include <ext/map.h>
namespace DB

View File

@ -184,11 +184,11 @@ void DataTypeString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr,
}
else
{
/** A small heuristic to evaluate that there are a lot of empty lines in the column.
/** A small heuristic to evaluate that there are a lot of empty strings in the column.
* In this case, to save RAM, we will say that the average size of the value is small.
*/
if (istr.position() + sizeof(UInt32) <= istr.buffer().end()
&& unalignedLoad<UInt32>(istr.position()) == 0) /// The first 4 rows are in the buffer and are empty.
&& unalignedLoad<UInt32>(istr.position()) == 0) /// The first 4 strings are in the buffer and are empty.
{
avg_chars_size = 1;
}

View File

@ -4,9 +4,9 @@
#include <DataStreams/NativeBlockOutputStream.h>
#include <DataTypes/DataTypeTuple.h>
#include <ext/map.hpp>
#include <ext/enumerate.hpp>
#include <ext/range.hpp>
#include <ext/map.h>
#include <ext/enumerate.h>
#include <ext/range.h>
namespace DB

View File

@ -8,7 +8,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNull.h>
#include <Common/Exception.h>
#include <ext/size.hpp>
#include <ext/size.h>
namespace DB

View File

@ -414,6 +414,8 @@ StoragePtr DatabaseCloud::tryGetTable(const String & table_name)
definition, name, data_path, context, false,
"in zookeeper node " + zookeeper_path + "/table_definitions/" + hashToHex(table_hash));
table->startup();
local_tables_cache.emplace(table_name, table);
return table;
}

View File

@ -181,6 +181,60 @@ void DatabaseOrdinary::loadTables(Context & context, ThreadPool * thread_pool, b
task();
}
if (thread_pool)
thread_pool->wait();
/// After all tables was basically initialized, startup them.
startupTables(thread_pool);
}
void DatabaseOrdinary::startupTables(ThreadPool * thread_pool)
{
LOG_INFO(log, "Starting up tables.");
StopwatchWithLock watch;
std::atomic<size_t> tables_processed {0};
size_t total_tables = tables.size();
auto task_function = [&](Tables::iterator begin, Tables::iterator end)
{
for (Tables::iterator it = begin; it != end; ++it)
{
if ((++tables_processed) % PRINT_MESSAGE_EACH_N_TABLES == 0
|| watch.lockTestAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))
{
LOG_INFO(log, std::fixed << std::setprecision(2) << tables_processed * 100.0 / total_tables << "%");
watch.restart();
}
it->second->startup();
}
};
const size_t bunch_size = TABLES_PARALLEL_LOAD_BUNCH_SIZE;
size_t num_bunches = (total_tables + bunch_size - 1) / bunch_size;
Tables::iterator begin = tables.begin();
for (size_t i = 0; i < num_bunches; ++i)
{
auto end = begin;
if (i + 1 == num_bunches)
end = tables.end();
else
std::advance(end, bunch_size);
auto task = std::bind(task_function, begin, end);
if (thread_pool)
thread_pool->schedule(task);
else
task();
begin = end;
}
if (thread_pool)
thread_pool->wait();
}

View File

@ -45,6 +45,9 @@ public:
const NamesAndTypesList & alias_columns,
const ColumnDefaults & column_defaults,
const ASTModifier & engine_modifier) override;
private:
void startupTables(ThreadPool * thread_pool);
};
}

View File

@ -24,6 +24,7 @@ String getTableDefinitionFromCreateQuery(const ASTPtr & query);
/** Create a table by its definition, without using InterpreterCreateQuery.
* (InterpreterCreateQuery has more complex functionality, and it can not be used if the database has not been created yet)
* Returns the table name and the table itself.
* You must subsequently call IStorage::startup method to use the table.
*/
std::pair<String, StoragePtr> createTableFromDefinition(
const String & definition,

View File

@ -1,6 +1,9 @@
#include <functional>
#include <sstream>
#include <memory>
#include <Columns/ColumnsNumber.h>
#include <Dictionaries/CacheDictionary.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/HashTable/Hash.h>
@ -8,9 +11,12 @@
#include <Common/ProfilingScopedRWLock.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <ext/size.hpp>
#include <ext/range.hpp>
#include <ext/map.hpp>
#include <DataTypes/DataTypesNumber.h>
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <ext/size.h>
#include <ext/range.h>
#include <ext/map.h>
namespace ProfileEvents
@ -957,4 +963,33 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a
return attributes[it->second];
}
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{
return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data
== ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
}
PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
PaddedPODArray<Key> array;
for (size_t idx = 0; idx < cells.size(); ++idx)
{
auto & cell = cells[idx];
if (!isEmptyCell(idx) && !cells[idx].isDefault())
{
array.push_back(cell.id);
}
}
return array;
}
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
}
}

View File

@ -6,7 +6,7 @@
#include <Common/ArenaWithFreeLists.h>
#include <Common/CurrentMetrics.h>
#include <Columns/ColumnString.h>
#include <ext/bit_cast.hpp>
#include <ext/bit_cast.h>
#include <Poco/RWLock.h>
#include <cmath>
#include <atomic>
@ -137,6 +137,8 @@ public:
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = Value[];
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -208,6 +210,10 @@ private:
const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated,
AbsentIdHandler && on_id_not_found) const;
PaddedPODArray<Key> getCachedIds() const;
bool isEmptyCell(const UInt64 idx) const;
UInt64 getCellIdx(const Key id) const;
void setDefaultAttributeValue(Attribute & attribute, const Key idx) const;

View File

@ -6,7 +6,7 @@
#include <Interpreters/executeQuery.h>
#include <Common/isLocalAddress.h>
#include <memory>
#include <ext/range.hpp>
#include <ext/range.h>
namespace DB

View File

@ -1,11 +1,12 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/Stopwatch.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <ext/range.hpp>
#include <ext/range.h>
namespace ProfileEvents
@ -265,7 +266,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
const auto & cell_idx = find_result.cell_idx;
@ -457,7 +458,7 @@ void ComplexKeyCacheDictionary::getItemsNumberImpl(
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
@ -536,7 +537,7 @@ void ComplexKeyCacheDictionary::getItemsString(
/// fetch up-to-date values, discard on fail
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
const auto find_result = findCellIdx(key, now);
@ -581,7 +582,7 @@ void ComplexKeyCacheDictionary::getItemsString(
const auto now = std::chrono::system_clock::now();
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
@ -899,7 +900,7 @@ StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns &
if (key_size_is_fixed)
return placeKeysInFixedSizePool(row, key_columns);
return placeKeysInPool(row, key_columns, keys, *keys_pool);
return placeKeysInPool(row, key_columns, keys, *dict_struct.key, *keys_pool);
}
void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
@ -910,28 +911,49 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
keys_pool->free(const_cast<char *>(key.data), key.size);
}
template <typename Arena>
template <typename Pool>
StringRef ComplexKeyCacheDictionary::placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool)
const size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
{
const auto keys_size = key_columns.size();
size_t sum_keys_size{};
for (const auto i : ext::range(0, keys_size))
{
keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row);
sum_keys_size += keys[i].size;
}
const auto res = pool.alloc(sum_keys_size);
auto place = res;
for (size_t j = 0; j < keys_size; ++j)
{
memcpy(place, keys[j].data, keys[j].size);
place += keys[j].size;
keys[j] = key_columns[j]->getDataAt(row);
sum_keys_size += keys[j].size;
if (key_attributes[j].underlying_type == AttributeUnderlyingType::String)
sum_keys_size += sizeof(size_t) + 1;
}
return { res, sum_keys_size };
auto place = pool.alloc(sum_keys_size);
auto key_start = place;
for (size_t j = 0; j < keys_size; ++j)
{
if (key_attributes[j].underlying_type == AttributeUnderlyingType::String)
{
auto start = key_start;
auto key_size = keys[j].size + 1;
memcpy(key_start, &key_size, sizeof(size_t));
key_start += sizeof(size_t);
memcpy(key_start, keys[j].data, keys[j].size);
key_start += keys[j].size;
*key_start = '\0';
++key_start;
keys[j].data = start;
keys[j].size += sizeof(size_t) + 1;
}
else
{
memcpy(key_start, keys[j].data, keys[j].size);
keys[j].data = key_start;
key_start += keys[j].size;
}
}
return { place, sum_keys_size };
}
StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(
@ -965,4 +987,26 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const
return { res, key.size };
}
bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const
{
return (cells[idx].key == StringRef{} && (idx != zero_cell_idx
|| cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())));
}
BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
std::vector<StringRef> keys;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
for (auto idx : ext::range(0, cells.size()))
if (!isEmptyCell(idx)
&& !cells[idx].isDefault())
keys.push_back(cells[idx].key);
}
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
}
}

View File

@ -9,9 +9,9 @@
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnString.h>
#include <Core/StringRef.h>
#include <ext/scope_guard.hpp>
#include <ext/bit_cast.hpp>
#include <ext/map.hpp>
#include <ext/scope_guard.h>
#include <ext/bit_cast.h>
#include <ext/map.h>
#include <Poco/RWLock.h>
#include <atomic>
#include <chrono>
@ -147,6 +147,8 @@ public:
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value> using ContainerType = Value[];
@ -233,7 +235,8 @@ private:
template <typename Arena>
static StringRef placeKeysInPool(
const std::size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
const std::size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, Arena & pool);
StringRef placeKeysInFixedSizePool(
const std::size_t row, const Columns & key_columns) const;
@ -255,6 +258,8 @@ private:
return findCellIdx(key, now, hash);
};
bool isEmptyCell(const UInt64 idx) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1,6 +1,7 @@
#include <ext/map.hpp>
#include <ext/range.hpp>
#include <ext/map.h>
#include <ext/range.h>
#include <Dictionaries/ComplexKeyHashedDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
namespace DB
@ -460,22 +461,22 @@ StringRef ComplexKeyHashedDictionary::placeKeysInPool(
{
const auto keys_size = key_columns.size();
size_t sum_keys_size{};
for (const auto i : ext::range(0, keys_size))
{
keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row);
sum_keys_size += keys[i].size;
}
const auto res = pool.alloc(sum_keys_size);
auto place = res;
const char * block_start = nullptr;
for (size_t j = 0; j < keys_size; ++j)
{
memcpy(place, keys[j].data, keys[j].size);
place += keys[j].size;
keys[j] = key_columns[j]->serializeValueIntoArena(row, pool, block_start);
sum_keys_size += keys[j].size;
}
return { res, sum_keys_size };
auto key_start = block_start;
for (size_t j = 0; j < keys_size; ++j)
{
keys[j].data = key_start;
key_start += keys[j].size;
}
return { block_start, sum_keys_size };
}
template <typename T>
@ -502,4 +503,44 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
query_count.fetch_add(rows, std::memory_order_relaxed);
}
std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
{
const Attribute & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: return getKeys<UInt8>(attribute); break;
case AttributeUnderlyingType::UInt16: return getKeys<UInt16>(attribute); break;
case AttributeUnderlyingType::UInt32: return getKeys<UInt32>(attribute); break;
case AttributeUnderlyingType::UInt64: return getKeys<UInt64>(attribute); break;
case AttributeUnderlyingType::Int8: return getKeys<Int8>(attribute); break;
case AttributeUnderlyingType::Int16: return getKeys<Int16>(attribute); break;
case AttributeUnderlyingType::Int32: return getKeys<Int32>(attribute); break;
case AttributeUnderlyingType::Int64: return getKeys<Int64>(attribute); break;
case AttributeUnderlyingType::Float32: return getKeys<Float32>(attribute); break;
case AttributeUnderlyingType::Float64: return getKeys<Float64>(attribute); break;
case AttributeUnderlyingType::String: return getKeys<StringRef>(attribute); break;
}
return {};
}
template <typename T>
std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & attribute) const
{
const ContainerType<T> & attr = *std::get<ContainerPtrType<T>>(attribute.maps);
std::vector<StringRef> keys;
keys.reserve(attr.size());
for (const auto & key : attr)
keys.push_back(key.first);
return keys;
}
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
}
}

View File

@ -7,7 +7,7 @@
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <atomic>
#include <memory>
#include <tuple>
@ -16,6 +16,7 @@
namespace DB
{
class ComplexKeyHashedDictionary final : public IDictionaryBase
{
public:
@ -125,6 +126,8 @@ public:
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -188,6 +191,11 @@ private:
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
std::vector<StringRef> getKeys() const;
template <typename T>
std::vector<StringRef> getKeys(const Attribute & attribute) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -0,0 +1,416 @@
#pragma once
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <ext/range.h>
#include <common/logger_useful.h>
#include <Core/Names.h>
namespace DB
{
/*
* BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
template <class DictionaryType, class Key>
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionatyPtr = std::shared_ptr<DictionaryType const>;
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
PaddedPODArray<Key> && ids, const Names & column_names);
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const std::vector<StringRef> & keys, const Names & column_names);
using GetColumnsFunction =
std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute>& attributes)>;
// Used to separate key columns format for storage and view.
// Calls get_key_columns_function to get key column for dictionary get fuction call
// and get_view_columns_function to get key representation.
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const Columns & data_columns, const Names & column_names,
GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function);
String getName() const override {
return "DictionaryBlockInputStream";
}
protected:
Block getBlock(size_t start, size_t size) const override;
private:
// pointer types to getXXX functions
// for single key dictionaries
template <class Type>
using DictionaryGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
using DictionaryStringGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
// for complex complex key dictionaries
template <class Type>
using GetterByKey = void (DictionaryType::*)(
const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
using StringGetterByKey = void (DictionaryType::*)(
const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
// call getXXX
// for single key dictionaries
template <class Type, class Container>
void callGetter(DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <class Container>
void callGetter(DictionaryStringGetter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
// for complex complex key dictionaries
template <class Type, class Container>
void callGetter(GetterByKey<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <class Container>
void callGetter(StringGetterByKey getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <template <class> class Getter, class StringGetter>
Block fillBlock(const PaddedPODArray<Key> & ids, const Columns & keys,
const DataTypes & types, ColumnsWithTypeAndName && view) const;
template <class AttributeType, class Getter>
ColumnPtr getColumnFromAttribute(Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <class Getter>
ColumnPtr getColumnFromStringAttribute(Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
ColumnPtr getColumnFromIds(const PaddedPODArray<Key>& ids) const;
void fillKeyColumns(const std::vector<StringRef> & keys, size_t start, size_t size,
const DictionaryStructure& dictionary_structure, ColumnsWithTypeAndName & columns) const;
DictionatyPtr dictionary;
Names column_names;
PaddedPODArray<Key> ids;
ColumnsWithTypeAndName key_columns;
Poco::Logger * logger;
Block (DictionaryBlockInputStream<DictionaryType, Key>::*fillBlockFunction)(
const PaddedPODArray<Key>& ids, const Columns& keys,
const DataTypes & types, ColumnsWithTypeAndName && view) const;
Columns data_columns;
GetColumnsFunction get_key_columns_function;
GetColumnsFunction get_view_columns_function;
};
template <class DictionaryType, class Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
PaddedPODArray<Key> && ids, const Names& column_names)
: DictionaryBlockInputStreamBase(ids.size(), max_block_size),
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)),
column_names(column_names), ids(std::move(ids)),
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryStringGetter>)
{
}
template <class DictionaryType, class Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const std::vector<StringRef> & keys, const Names& column_names)
: DictionaryBlockInputStreamBase(keys.size(), max_block_size),
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names),
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, StringGetterByKey>)
{
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
}
template <class DictionaryType, class Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const Columns & data_columns, const Names & column_names,
GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function)
: DictionaryBlockInputStreamBase(data_columns.front()->size(), max_block_size),
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names),
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, StringGetterByKey>),
data_columns(data_columns),
get_key_columns_function(get_key_columns_function), get_view_columns_function(get_view_columns_function)
{
}
template <class DictionaryType, class Key>
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
{
if (!key_columns.empty())
{
Columns columns;
ColumnsWithTypeAndName view_columns;
columns.reserve(key_columns.size());
for (const auto & key_column : key_columns)
{
auto column = key_column.column->cut(start, length);
columns.emplace_back(column);
view_columns.emplace_back(column, key_column.type, key_column.name);
}
return (this->*fillBlockFunction)({}, columns, {}, std::move(view_columns));
}
else if(!ids.empty())
{
PaddedPODArray<Key> block_ids(ids.begin() + start, ids.begin() + start + length);
return (this->*fillBlockFunction)(block_ids, {}, {}, {});
}
else
{
Columns columns;
columns.reserve(data_columns.size());
for (const auto & data_column : data_columns)
columns.push_back(data_column->cut(start, length));
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
const auto & attributes = *dictionaty_structure.key;
ColumnsWithTypeAndName keys_with_type_and_name = get_key_columns_function(columns, attributes);
ColumnsWithTypeAndName view_with_type_and_name = get_view_columns_function(columns, attributes);
DataTypes types;
columns.clear();
for (const auto & key_column : keys_with_type_and_name)
{
columns.push_back(key_column.column);
types.push_back(key_column.type);
}
return (this->*fillBlockFunction)({}, columns, types, std::move(view_with_type_and_name));
}
}
template <class DictionaryType, class Key>
template <class Type, class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, ids, container);
}
template <class DictionaryType, class Key>
template <class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryStringGetter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, ids, container);
}
template <class DictionaryType, class Key>
template <class Type, class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
GetterByKey<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, keys, data_types, container);
}
template <class DictionaryType, class Key>
template <class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
StringGetterByKey getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, keys, data_types, container);
}
template <class DictionaryType, class Key>
template <template <class> class Getter, class StringGetter>
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
const PaddedPODArray<Key>& ids, const Columns& keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
{
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
DataTypes data_types = types;
ColumnsWithTypeAndName block_columns;
data_types.reserve(keys.size());
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
if (data_types.empty() && dictionaty_structure.key)
for (const auto key : *dictionaty_structure.key)
data_types.push_back(key.type);
for (const auto & column : view)
if (names.find(column.name) != names.end())
block_columns.push_back(column);
const DictionaryStructure& structure = dictionary->getStructure();
if (structure.id && names.find(structure.id->name) != names.end())
block_columns.emplace_back(getColumnFromIds(ids), std::make_shared<DataTypeUInt64>(), structure.id->name);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute& attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE, Getter<TYPE>>( \
&DictionaryType::get##TYPE, ids, keys, data_types, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::UInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::UInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::UInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::Int8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::Int16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::Int32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::Int64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::Float32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::Float64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::String:
{
column = getColumnFromStringAttribute<StringGetter>(
&DictionaryType::getString, ids, keys, data_types, attribute, *dictionary);
break;
}
}
block_columns.emplace_back(column, attribute.type, attribute.name);
}
}
return Block(block_columns);
}
template <class DictionaryType, class Key>
template <class AttributeType, class Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
auto size = ids.size();
if (!keys.empty())
size = keys.front()->size();
auto column_vector = std::make_unique<ColumnVector<AttributeType>>(size);
callGetter(getter, ids, keys, data_types, column_vector->getData(), attribute, dictionary);
return ColumnPtr(std::move(column_vector));
}
template <class DictionaryType, class Key>
template <class Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
{
auto column_string = std::make_unique<ColumnString>();
auto ptr = column_string.get();
callGetter(getter, ids, keys, data_types, ptr, attribute, dictionary);
return column_string;
}
template <class DictionaryType, class Key>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key>& ids) const
{
auto column_vector = std::make_shared<ColumnVector<UInt64>>();
column_vector->getData().reserve(ids.size());
for (UInt64 id : ids)
{
column_vector->insert(id);
}
return column_vector;
}
template <class DictionaryType, class Key>
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
const std::vector<StringRef> & keys, size_t start, size_t size,
const DictionaryStructure& dictionary_structure, ColumnsWithTypeAndName & columns) const
{
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
{
#define ADD_COLUMN(TYPE) columns.push_back( \
ColumnWithTypeAndName(std::make_shared<ColumnVector<TYPE>>(), attribute.type, attribute.name))
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
ADD_COLUMN(UInt8);
break;
case AttributeUnderlyingType::UInt16:
ADD_COLUMN(UInt16);
break;
case AttributeUnderlyingType::UInt32:
ADD_COLUMN(UInt32);
break;
case AttributeUnderlyingType::UInt64:
ADD_COLUMN(UInt64);
break;
case AttributeUnderlyingType::Int8:
ADD_COLUMN(Int8);
break;
case AttributeUnderlyingType::Int16:
ADD_COLUMN(Int16);
break;
case AttributeUnderlyingType::Int32:
ADD_COLUMN(Int32);
break;
case AttributeUnderlyingType::Int64:
ADD_COLUMN(Int64);
break;
case AttributeUnderlyingType::Float32:
ADD_COLUMN(Float32);
break;
case AttributeUnderlyingType::Float64:
ADD_COLUMN(Float64);
break;
case AttributeUnderlyingType::String:
{
columns.push_back(ColumnWithTypeAndName(std::make_shared<ColumnString>(), attribute.type, attribute.name));
break;
}
}
}
for (auto idx : ext::range(start, size))
{
const auto & key = keys[idx];
auto ptr = key.data;
for (const auto & column : columns)
ptr = column.column->deserializeAndInsertFromArena(ptr);
}
}
}

View File

@ -0,0 +1,29 @@
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
namespace DB
{
DictionaryBlockInputStreamBase::DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size)
: rows_count(rows_count), max_block_size(max_block_size), next_row(0)
{
}
String DictionaryBlockInputStreamBase::getID() const
{
std::stringstream ss;
ss << static_cast<const void*>(this);
return ss.str();
}
Block DictionaryBlockInputStreamBase::readImpl()
{
if (next_row == rows_count)
return Block();
size_t block_size = std::min<size_t>(max_block_size, rows_count - next_row);
Block block = getBlock(next_row, block_size);
next_row += block_size;
return block;
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <DataStreams/IProfilingBlockInputStream.h>
namespace DB
{
class DictionaryBlockInputStreamBase : public IProfilingBlockInputStream
{
protected:
Block block;
DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size);
String getID() const override;
virtual Block getBlock(size_t start, size_t length) const = 0;
private:
const size_t rows_count;
const size_t max_block_size;
size_t next_row;
Block readImpl() override;
void readPrefixImpl() override { next_row = 0; }
};
}

View File

@ -5,7 +5,7 @@
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <numeric>
#include <vector>
#include <string>

View File

@ -1,4 +1,4 @@
#include <ext/range.hpp>
#include <ext/range.h>
#include <boost/range/join.hpp>
#include <IO/WriteBuffer.h>
#include <IO/WriteBufferFromString.h>

View File

@ -1,4 +1,4 @@
#include <ext/range.hpp>
#include <ext/range.h>
#include <Dictionaries/ExternalResultDescription.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>

View File

@ -1,5 +1,5 @@
#include <Dictionaries/FlatDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
namespace DB
{
@ -524,4 +524,26 @@ void FlatDictionary::has(const Attribute & attribute, const PaddedPODArray<Key>
query_count.fetch_add(ids_count, std::memory_order_relaxed);
}
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
{
const auto ids_count = ext::size(loaded_ids);
PaddedPODArray<Key> ids;
for (auto idx : ext::range(0, ids_count))
{
if (loaded_ids[idx]) {
ids.push_back(idx);
}
}
return ids;
}
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds() ,column_names);
}
}

View File

@ -5,8 +5,8 @@
#include <Dictionaries/DictionaryStructure.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <ext/range.hpp>
#include <ext/size.hpp>
#include <ext/range.h>
#include <ext/size.h>
#include <atomic>
#include <vector>
#include <tuple>
@ -125,6 +125,8 @@ public:
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = PaddedPODArray<Value>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -191,6 +193,8 @@ private:
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
PaddedPODArray<Key> getIds() const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1,6 +1,6 @@
#include <ext/size.hpp>
#include <ext/size.h>
#include <Dictionaries/HashedDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
namespace DB
{
@ -479,4 +479,44 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename T>
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute & attribute) const
{
const HashMap<UInt64, T> & attr = *std::get<CollectionPtrType<T>>(attribute.maps);
PaddedPODArray<Key> ids;
ids.reserve(attr.size());
for (const auto & value : attr) {
ids.push_back(value.first);
}
return ids;
}
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
{
const auto & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: return getIds<UInt8>(attribute); break;
case AttributeUnderlyingType::UInt16: return getIds<UInt16>(attribute); break;
case AttributeUnderlyingType::UInt32: return getIds<UInt32>(attribute); break;
case AttributeUnderlyingType::UInt64: return getIds<UInt64>(attribute); break;
case AttributeUnderlyingType::Int8: return getIds<Int8>(attribute); break;
case AttributeUnderlyingType::Int16: return getIds<Int16>(attribute); break;
case AttributeUnderlyingType::Int32: return getIds<Int32>(attribute); break;
case AttributeUnderlyingType::Int64: return getIds<Int64>(attribute); break;
case AttributeUnderlyingType::Float32: return getIds<Float32>(attribute); break;
case AttributeUnderlyingType::Float64: return getIds<Float64>(attribute); break;
case AttributeUnderlyingType::String: return getIds<StringRef>(attribute); break;
}
return PaddedPODArray<Key>();
}
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}
}

View File

@ -5,7 +5,7 @@
#include <Dictionaries/DictionaryStructure.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnString.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <atomic>
#include <memory>
#include <tuple>
@ -123,6 +123,8 @@ public:
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using CollectionType = HashMap<UInt64, Value>;
template <typename Value> using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
@ -181,6 +183,11 @@ private:
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
template <typename T>
PaddedPODArray<Key> getIds(const Attribute & attribute) const;
PaddedPODArray<Key> getIds() const;
template <typename ChildType, typename AncestorType>
void isInImpl(
const ChildType & child_ids,

View File

@ -2,6 +2,7 @@
#include <Core/Field.h>
#include <Core/StringRef.h>
#include <Core/Names.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Common/PODArray.h>
#include <memory>
@ -19,8 +20,11 @@ struct DictionaryLifetime;
struct DictionaryStructure;
class ColumnString;
class IBlockInputStream;
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
struct IDictionaryBase
struct IDictionaryBase : public std::enable_shared_from_this<IDictionaryBase>
{
using Key = UInt64;
@ -53,6 +57,8 @@ struct IDictionaryBase
virtual bool isInjective(const std::string & attribute_name) const = 0;
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
virtual ~IDictionaryBase() = default;
};

View File

@ -17,7 +17,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <Core/FieldVisitors.h>

View File

@ -15,7 +15,7 @@
#include <Dictionaries/MongoDBDictionarySource.h>
#include <Dictionaries/MongoDBBlockInputStream.h>
#include <Core/FieldVisitors.h>
#include <ext/enumerate.hpp>
#include <ext/enumerate.h>
namespace DB

View File

@ -4,7 +4,7 @@
#include <Dictionaries/MySQLBlockInputStream.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <vector>

View File

@ -3,7 +3,7 @@
#include <Dictionaries/IDictionarySource.h>
#include <Dictionaries/ExternalQueryBuilder.h>
#include <Dictionaries/DictionaryStructure.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <mysqlxx/PoolWithFailover.h>
#include <Poco/Util/AbstractConfiguration.h>

View File

@ -4,7 +4,7 @@
#include <Columns/ColumnString.h>
#include <common/logger_useful.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <vector>

View File

@ -0,0 +1,214 @@
#pragma once
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <ext/range.h>
namespace DB
{
/*
* BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
template <class DictionaryType, class Key>
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionatyPtr = std::shared_ptr<DictionaryType const>;
RangeDictionaryBlockInputStream(
DictionatyPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids,
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates);
String getName() const override {
return "RangeDictionaryBlockInputStream";
}
protected:
Block getBlock(size_t start, size_t length) const override;
private:
template <class Type>
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &,
const PaddedPODArray<UInt16> &, PaddedPODArray<Type> &) const;
template <class AttributeType>
ColumnPtr getColumnFromAttribute(DictionaryGetter<AttributeType> getter,
const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
template <class T>
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T>& array) const;
template <class T>
void addSpecialColumn(
const std::experimental::optional<DictionarySpecialAttribute>& attribute, DataTypePtr type,
const std::string & default_name, const std::unordered_set<std::string> & column_names,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName& columns) const;
Block fillBlock(const PaddedPODArray<Key> & ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
DictionatyPtr dictionary;
Names column_names;
PaddedPODArray<Key> ids;
PaddedPODArray<UInt16> start_dates;
PaddedPODArray<UInt16> end_dates;
};
template <class DictionaryType, class Key>
RangeDictionaryBlockInputStream<DictionaryType, Key>::RangeDictionaryBlockInputStream(
DictionatyPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids,
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates)
: DictionaryBlockInputStreamBase(ids.size(), max_column_size),
dictionary(dictionary), column_names(column_names),
ids(std::move(ids)), start_dates(std::move(start_dates)), end_dates(std::move(end_dates))
{
}
template <class DictionaryType, class Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
{
PaddedPODArray<Key> block_ids;
PaddedPODArray<UInt16> block_start_dates;
PaddedPODArray<UInt16> block_end_dates;
block_ids.reserve(length);
block_start_dates.reserve(length);
block_end_dates.reserve(length);
for (auto idx : ext::range(start, start + length))
{
block_ids.push_back(ids[idx]);
block_start_dates.push_back(block_start_dates[idx]);
block_end_dates.push_back(block_end_dates[idx]);
}
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
template <class DictionaryType, class Key>
template <class AttributeType>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
DictionaryGetter<AttributeType> getter, const PaddedPODArray<Key>& ids,
const PaddedPODArray<UInt16> & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
{
auto column_vector = std::make_unique<ColumnVector<AttributeType>>(ids.size());
(dictionary.*getter)(attribute.name, ids, dates, column_vector->getData());
return ColumnPtr(std::move(column_vector));
}
template <class DictionaryType, class Key>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttributeString(
const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
{
auto column_string = std::make_unique<ColumnString>();
dictionary.getString(attribute.name, ids, dates, column_string.get());
return ColumnPtr(std::move(column_string));
}
template <class DictionaryType, class Key>
template <class T>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromPODArray(const PaddedPODArray<T>& array) const
{
auto column_vector = std::make_unique<ColumnVector<T>>();
column_vector->getData().reserve(array.size());
for (T value : array)
{
column_vector->insert(value);
}
return ColumnPtr(std::move(column_vector));
}
template <class DictionaryType, class Key>
template <class T>
void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
const std::experimental::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
const std::string& default_name, const std::unordered_set<std::string> & column_names,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const
{
std::string name = default_name;
if (attribute) {
name = attribute->name;
}
if (column_names.find(name) != column_names.end()) {
columns.emplace_back(getColumnFromPODArray(values), type, name);
}
}
template <class DictionaryType, class Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
const PaddedPODArray<Key>& ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
{
ColumnsWithTypeAndName columns;
const DictionaryStructure& structure = dictionary->getStructure();
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids, columns);
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute& attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::UInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::UInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::UInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::Int8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::Int16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::Int32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::Int64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::Float32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::Float64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::String:
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
break;
}
columns.emplace_back(column, attribute.type, attribute.name);
}
}
return Block(columns);
}
}

View File

@ -1,4 +1,5 @@
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/RangeDictionaryBlockInputStream.h>
namespace DB
@ -353,4 +354,59 @@ const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttributeWith
return attribute;
}
void RangeHashedDictionary::getIdsAndDates(PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::UInt16: getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::UInt32: getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::UInt64: getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int8: getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int16: getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int32: getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int64: getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Float32: getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Float64: getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::String: getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates); break;
}
}
template <typename T>
void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const
{
const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps);
ids.reserve(attr.size());
start_dates.reserve(attr.size());
end_dates.reserve(attr.size());
for (const auto & key : attr) {
ids.push_back(key.first);
for (const auto & value : key.second)
{
start_dates.push_back(value.range.first);
end_dates.push_back(value.range.second);
}
}
}
BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
PaddedPODArray<Key> ids;
PaddedPODArray<UInt16> start_dates;
PaddedPODArray<UInt16> end_dates;
getIdsAndDates(ids, start_dates, end_dates);
using BlockInputStreamType = RangeDictionaryBlockInputStream<RangeHashedDictionary, Key>;
auto dict_ptr = std::static_pointer_cast<const RangeHashedDictionary>(shared_from_this());
return std::make_shared<BlockInputStreamType>(
dict_ptr, max_block_size, column_names, std::move(ids), std::move(start_dates), std::move(end_dates));
}
}

View File

@ -5,7 +5,7 @@
#include <Dictionaries/DictionaryStructure.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnString.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <atomic>
#include <memory>
#include <tuple>
@ -79,6 +79,8 @@ public:
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
ColumnString * out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
struct Range : std::pair<UInt16, UInt16>
{
@ -166,6 +168,13 @@ private:
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
void getIdsAndDates(PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const;
template <typename T>
void getIdsAndDates(const Attribute & attribute, PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1,8 +1,15 @@
#include <ext/map.hpp>
#include <ext/range.hpp>
#include <stack>
#include <ext/map.h>
#include <ext/range.h>
#include <Poco/Net/IPAddress.h>
#include <Poco/ByteOrder.h>
#include <Dictionaries/TrieDictionary.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnFixedString.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionsCoding.h>
#include <IO/WriteIntText.h>
#include <iostream>
namespace DB
@ -20,7 +27,7 @@ TrieDictionary::TrieDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime, bool require_nonempty)
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime),
require_nonempty(require_nonempty)
require_nonempty(require_nonempty), logger(&Poco::Logger::get("TrieDictionary"))
{
createAttributes();
trie = btrie_create();
@ -425,7 +432,7 @@ void TrieDictionary::getItemsImpl(
auto addr = first_column->getDataAt(i);
if (addr.size != 16)
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
set_value(i, slot != BTRIE_NULL ? vec[slot] : get_default(i));
}
@ -536,12 +543,101 @@ void TrieDictionary::has(const Attribute & attribute, const Columns & key_column
auto addr = first_column->getDataAt(i);
if (unlikely(addr.size != 16))
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
out[i] = (slot != BTRIE_NULL);
}
}
query_count.fetch_add(rows, std::memory_order_relaxed);}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename Getter, typename KeyType>
void TrieDictionary::trieTraverse(const btrie_t * tree, Getter && getter) const
{
KeyType key = 0;
const KeyType high_bit = ~((~key) >> 1);
btrie_node_t * node;
node = tree->root;
std::stack<btrie_node_t *> stack;
while (node)
{
stack.push(node);
node = node->left;
}
auto getBit = [&high_bit](size_t size) { return size ? (high_bit >> (size - 1)) : 0; };
while (!stack.empty())
{
node = stack.top();
stack.pop();
if (node && node->value != BTRIE_NULL)
getter(key, stack.size());
if (node && node->right)
{
stack.push(NULL);
key |= getBit(stack.size());
stack.push(node->right);
while (stack.top()->left)
stack.push(stack.top()->left);
}
else
key &= ~getBit(stack.size());
}
}
Columns TrieDictionary::getKeyColumns() const
{
auto ip_column = std::make_shared<ColumnFixedString>(ipv6_bytes_length);
auto mask_column = std::make_shared<ColumnVector<UInt8>>();
auto getter = [& ip_column, & mask_column](__uint128_t ip, size_t mask) {
UInt64 * ip_array = reinterpret_cast<UInt64 *>(&ip);
ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
std::swap(ip_array[0], ip_array[1]);
ip_column->insertData(reinterpret_cast<const char *>(ip_array), ipv6_bytes_length);
mask_column->insert(static_cast<UInt8>(mask));
};
trieTraverse<decltype(getter), __uint128_t>(trie, std::move(getter));
return {ip_column, mask_column};
}
BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<TrieDictionary, UInt64>;
auto getKeys = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
{
const auto & attr = attributes.front();
return ColumnsWithTypeAndName({ColumnWithTypeAndName(columns.front(),
std::make_shared<DataTypeFixedString>(ipv6_bytes_length), attr.name)});
};
auto getView = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
{
auto column = std::make_shared<ColumnString>();
auto ip_column = std::static_pointer_cast<ColumnFixedString>(columns.front());
auto mask_column = std::static_pointer_cast<ColumnVector<UInt8>>(columns.back());
char buffer[48];
for (size_t row : ext::range(0, ip_column->size()))
{
UInt8 mask = mask_column->getElement(row);
char * ptr = buffer;
IPv6Format::apply(reinterpret_cast<const unsigned char *>(ip_column->getDataAt(row).data), ptr);
*(ptr - 1) = '/';
auto size = detail::writeUIntText(mask, ptr);
column->insertData(buffer, size + (ptr - buffer));
}
return ColumnsWithTypeAndName{ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), attributes.front().name)};
};
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeyColumns(), column_names,
std::move(getKeys), std::move(getView));
}
}

View File

@ -7,12 +7,12 @@
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <btrie.h>
#include <atomic>
#include <memory>
#include <tuple>
#include <common/logger_useful.h>
namespace DB
{
@ -128,6 +128,8 @@ public:
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = std::vector<Value>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -190,6 +192,11 @@ private:
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
template <typename Getter, typename KeyType>
void trieTraverse(const btrie_t * trie, Getter && getter) const;
Columns getKeyColumns() const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
@ -210,6 +217,8 @@ private:
std::chrono::time_point<std::chrono::system_clock> creation_time;
std::exception_ptr creation_exception;
Logger * logger;
};

View File

@ -4,6 +4,7 @@
#include <memory>
#include <unordered_map>
#include <common/singleton.h>
#include <Common/Exception.h>
namespace DB
@ -13,6 +14,11 @@ class Context;
class IFunction;
using FunctionPtr = std::shared_ptr<IFunction>;
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/** Creates function by name.
* Function could use for initialization (take ownership of shared_ptr, for example)
@ -23,20 +29,23 @@ class FunctionFactory : public Singleton<FunctionFactory>
friend class StorageSystemFunctions;
private:
typedef FunctionPtr (*Creator)(const Context & context); /// Not std::function, for lower object size and less indirection.
using Creator = FunctionPtr(*)(const Context & context); /// Not std::function, for lower object size and less indirection.
std::unordered_map<std::string, Creator> functions;
public:
FunctionFactory();
FunctionPtr get(const std::string & name, const Context & context) const; /// Throws an exception if not found.
FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
/// No locking, you must register all functions before usage of get, tryGet.
template <typename F> void registerFunction()
template <typename Function> void registerFunction()
{
static_assert(std::is_same<decltype(&F::create), Creator>::value, "F::create has incorrect type");
functions[F::name] = &F::create;
static_assert(std::is_same<decltype(&Function::create), Creator>::value, "Function::create has incorrect type");
if (!functions.emplace(std::string(Function::name), &Function::create).second)
throw Exception("FunctionFactory: the function name '" + std::string(Function::name) + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
}
};

View File

@ -17,6 +17,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_DIVISION;
extern const int ILLEGAL_COLUMN;
}

View File

@ -15,6 +15,7 @@
#include <Interpreters/AggregationCommon.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnAggregateFunction.h>
#include <boost/iterator/counting_iterator.hpp>
#include <tuple>
#include <array>
@ -384,7 +385,7 @@ public:
void update(size_t from)
{
if (index >= size)
throw Exception{"Internal errror", ErrorCodes::LOGICAL_ERROR};
throw Exception{"Logical error: index passes to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
bool is_null;
if (src_nullable_col != nullptr)
@ -401,7 +402,7 @@ public:
void update()
{
if (index >= size)
throw Exception{"Internal errror", ErrorCodes::LOGICAL_ERROR};
throw Exception{"Logical error: index passes to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
auto & null_map_data = static_cast<ColumnUInt8 &>(*sink_null_map).getData();
null_map_data[index] = 0;
@ -892,7 +893,7 @@ bool FunctionArrayElement::executeConstConst(Block & block, const ColumnNumbers
if (!col_array)
return false;
const DB::Array & array = col_array->getData();
const Array & array = col_array->getData();
size_t array_size = array.size();
size_t real_index = 0;
@ -903,11 +904,13 @@ bool FunctionArrayElement::executeConstConst(Block & block, const ColumnNumbers
else
throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR);
Field value = col_array->getData().at(real_index);
if (value.isNull())
value = DataTypeString{}.getDefault();
Field value;
if (real_index < array_size)
value = array.at(real_index);
else
value = block.getByPosition(result).type->getDefault();
block.safeGetByPosition(result).column = block.safeGetByPosition(result).type->createConstColumn(
block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn(
block.rows(),
value);
@ -926,7 +929,7 @@ bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arg
if (!col_array)
return false;
const DB::Array & array = col_array->getData();
const Array & array = col_array->getData();
size_t array_size = array.size();
block.safeGetByPosition(result).column = block.safeGetByPosition(result).type->createColumn();
@ -1396,8 +1399,8 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn *
const ColumnArray::Offsets_t & offsets = array->getOffsets();
const typename ColumnVector<T>::Container_t & values = nested->getData();
typedef ClearableHashSet<T, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)> > Set;
using Set = ClearableHashSet<T, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
@ -1442,8 +1445,8 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn *
return false;
const ColumnArray::Offsets_t & offsets = array->getOffsets();
typedef ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)> > Set;
using Set = ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
@ -1509,8 +1512,8 @@ bool FunctionArrayUniq::execute128bit(
if (keys_bytes > 16)
return false;
typedef ClearableHashSet<UInt128, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > Set;
using Set = ClearableHashSet<UInt128, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
/// Suppose that, for a given row, each of the N columns has an array whose length is M.
/// Denote arr_i each of these arrays (1 <= i <= N). Then the following is performed:
@ -1570,8 +1573,8 @@ void FunctionArrayUniq::executeHashed(
{
size_t count = columns.size();
typedef ClearableHashSet<UInt128, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > Set;
using Set = ClearableHashSet<UInt128, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
Set set;
size_t prev_off = 0;
@ -1722,8 +1725,8 @@ bool FunctionArrayEnumerateUniq::executeNumber(const ColumnArray * array, const
const ColumnArray::Offsets_t & offsets = array->getOffsets();
const typename ColumnVector<T>::Container_t & values = nested->getData();
typedef ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)> > ValuesToIndices;
using ValuesToIndices = ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
@ -1767,8 +1770,8 @@ bool FunctionArrayEnumerateUniq::executeString(const ColumnArray * array, const
const ColumnArray::Offsets_t & offsets = array->getOffsets();
size_t prev_off = 0;
typedef ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)> > ValuesToIndices;
using ValuesToIndices = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
@ -1836,8 +1839,8 @@ bool FunctionArrayEnumerateUniq::execute128bit(
if (keys_bytes > 16)
return false;
typedef ClearableHashMap<UInt128, UInt32, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > ValuesToIndices;
using ValuesToIndices = ClearableHashMap<UInt128, UInt32, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
ValuesToIndices indices;
size_t prev_off = 0;
@ -1882,8 +1885,8 @@ void FunctionArrayEnumerateUniq::executeHashed(
{
size_t count = columns.size();
typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > ValuesToIndices;
using ValuesToIndices = ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
ValuesToIndices indices;
size_t prev_off = 0;
@ -2354,7 +2357,7 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
IColumn::Offset_t offset{};
for (const auto i : ext::range(0, in->size()))
{
std::copy(ext::make_range_iterator(T{}), ext::make_range_iterator(in_data[i]), &out_data[offset]);
std::copy(boost::counting_iterator<T>(), boost::counting_iterator<T>(in_data[i]), &out_data[offset]);
offset += in_data[i];
out_offsets[i] = offset;
}
@ -2390,7 +2393,7 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
IColumn::Offset_t offset{};
for (const auto i : ext::range(0, in->size()))
{
std::copy(ext::make_range_iterator(T{}), ext::make_range_iterator(in_data), &out_data[offset]);
std::copy(boost::counting_iterator<T>(), boost::counting_iterator<T>(in_data), &out_data[offset]);
offset += in_data;
out_offsets[i] = offset;
}

View File

@ -18,7 +18,7 @@
#include <Functions/ObjectPool.h>
#include <Common/StringUtils.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <unordered_map>
#include <numeric>
@ -37,8 +37,8 @@ namespace ErrorCodes
/** Array functions:
*
* array(c1, c2, ...) - create an array of constants.
* arrayElement(arr, i) - get the array element by index.
* array(c1, c2, ...) - create an array.
* arrayElement(arr, i) - get the array element by index. If index is not constant and out of range - return default value of data type.
* The index begins with 1. Also, the index can be negative - then it is counted from the end of the array.
* has(arr, x) - whether there is an element x in the array.
* indexOf(arr, x) - returns the index of the element x (starting with 1), if it exists in the array, or 0 if it is not.
@ -48,8 +48,8 @@ namespace ErrorCodes
* arrayUniq(arr1, arr2, ...) - counts the number of different tuples from the elements in the corresponding positions in several arrays.
*
* arrayEnumerateUniq(arr)
* - outputs an array parallel to this, where for each element specified
* how much times this element was encountered before among elements with the same value.
* - outputs an array parallel (having same size) to this, where for each element specified
* how much times this element was encountered before (including this element) among elements with the same value.
* For example: arrayEnumerateUniq([10, 20, 10, 30]) = [1, 1, 2, 1]
* arrayEnumerateUniq(arr1, arr2...)
* - for tuples from elements in the corresponding positions in several arrays.
@ -57,6 +57,7 @@ namespace ErrorCodes
* emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value.
*
* arrayReduce('agg', arr1, ...) - apply the aggregate function `agg` to arrays `arr1...`
* If multiple arrays passed, then elements on corresponding positions are passed as multiple arguments to the aggregate function.
*/
@ -193,7 +194,7 @@ private:
static bool hasNull(const U & value, const PaddedPODArray<UInt8> & null_map, size_t i)
{
throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
throw Exception{"Logical error: constant column cannot have null map.", ErrorCodes::LOGICAL_ERROR};
}
/// Both function arguments are ordinary.
@ -1033,7 +1034,7 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{
/// If one or both arguments passed to this function are nullable,
/// we create a new block that contains non-nullable parameters:
/// we create a new block that contains non-nullable arguments:
/// - if the 1st argument is a non-constant array of nullable values,
/// it is turned into a non-constant array of ordinary values + a null
/// byte map;
@ -1199,7 +1200,7 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
private:
/// Initially allocate a piece of memory for 512 elements.
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
static constexpr size_t INITIAL_SIZE_DEGREE = 9;
template <typename T>
@ -1239,7 +1240,7 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
private:
/// Initially allocate a piece of memory for 512 elements.
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
static constexpr size_t INITIAL_SIZE_DEGREE = 9;
template <typename T>
@ -1343,7 +1344,7 @@ public:
class FunctionArrayReverse : public IFunction
{
public:
static constexpr auto name = "reverse";
static constexpr auto name = "arrayReverse";
static FunctionPtr create(const Context & context);
String getName() const override;
@ -1382,7 +1383,7 @@ class IAggregateFunction;
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
/** Applies an aggregate function to array and returns its result.
* If aggregate function has multiple arguments, then this function can be applied to multiple arrays with the same size.
* If aggregate function has multiple arguments, then this function can be applied to multiple arrays of the same size.
*/
class FunctionArrayReduce : public IFunction
{
@ -1406,9 +1407,9 @@ private:
};
struct NameHas { static constexpr auto name = "has"; };
struct NameIndexOf { static constexpr auto name = "indexOf"; };
struct NameCountEqual { static constexpr auto name = "countEqual"; };
struct NameHas { static constexpr auto name = "has"; };
struct NameIndexOf { static constexpr auto name = "indexOf"; };
struct NameCountEqual { static constexpr auto name = "countEqual"; };
using FunctionHas = FunctionArrayIndex<IndexToOne, NameHas>;
using FunctionIndexOf = FunctionArrayIndex<IndexIdentity, NameIndexOf>;

View File

@ -8,7 +8,7 @@
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
#include <ext/range.hpp>
#include <ext/range.h>
#include <unicode/ucnv.h>
@ -22,6 +22,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int CANNOT_CREATE_CHARSET_CONVERTER;
extern const int CANNOT_CONVERT_CHARSET;
extern const int ILLEGAL_COLUMN;
}

Some files were not shown because too many files have changed in this diff Show More