From f7300b5d31ef341c6169662ef87e40dbe7272f83 Mon Sep 17 00:00:00 2001 From: Dmitrii Makarenko Date: Fri, 30 Jun 2023 11:58:12 +0000 Subject: [PATCH] [Bench] Reduce input cols number for fetch To estimate output size of result query to allocate buffer we are running `count*` before some actual queries. This estimation requires only query body argumnets without `select` arguments, so this commit changes input_cols for `count*` query. Resolves: #574 Signed-off-by: Dmitrii Makarenko --- omniscidb/QueryEngine/CMakeLists.txt | 1 + .../QueryEngine/CardinalityEstimator.cpp | 3 +- omniscidb/QueryEngine/CardinalityEstimator.h | 1 + .../Descriptors/QueryFragmentDescriptor.cpp | 4 +- omniscidb/QueryEngine/JoinFilterPushDown.cpp | 15 +-- omniscidb/QueryEngine/RelAlgExecutionUnit.cpp | 84 ++++++++++++++ omniscidb/QueryEngine/RelAlgExecutionUnit.h | 105 ++++++++++++++++-- omniscidb/QueryEngine/RelAlgExecutor.cpp | 18 +-- .../Visitors/InputColumnsCollector.h | 37 ++++++ omniscidb/QueryEngine/WorkUnitBuilder.cpp | 89 +++------------ omniscidb/QueryEngine/WorkUnitBuilder.h | 3 +- 11 files changed, 246 insertions(+), 114 deletions(-) create mode 100644 omniscidb/QueryEngine/RelAlgExecutionUnit.cpp create mode 100644 omniscidb/QueryEngine/Visitors/InputColumnsCollector.h diff --git a/omniscidb/QueryEngine/CMakeLists.txt b/omniscidb/QueryEngine/CMakeLists.txt index 727fc36afa..c57d0d942b 100644 --- a/omniscidb/QueryEngine/CMakeLists.txt +++ b/omniscidb/QueryEngine/CMakeLists.txt @@ -79,6 +79,7 @@ set(query_engine_source_files QueryExecutionSequence.cpp QueryMemoryInitializer.cpp RelAlgDagBuilder.cpp + RelAlgExecutionUnit.cpp RelAlgExecutor.cpp RelAlgTranslator.cpp RelAlgOptimizer.cpp diff --git a/omniscidb/QueryEngine/CardinalityEstimator.cpp b/omniscidb/QueryEngine/CardinalityEstimator.cpp index ef528692d7..4e28b66b11 100644 --- a/omniscidb/QueryEngine/CardinalityEstimator.cpp +++ b/omniscidb/QueryEngine/CardinalityEstimator.cpp @@ -100,9 +100,10 @@ RelAlgExecutionUnit create_ndv_execution_unit(const RelAlgExecutionUnit& ra_exe_ RelAlgExecutionUnit create_count_all_execution_unit( const RelAlgExecutionUnit& ra_exe_unit, + const SchemaProvider* schema_provider, hdk::ir::ExprPtr replacement_target) { return {ra_exe_unit.input_descs, - ra_exe_unit.input_col_descs, + schema_provider, ra_exe_unit.simple_quals, ra_exe_unit.quals, ra_exe_unit.join_quals, diff --git a/omniscidb/QueryEngine/CardinalityEstimator.h b/omniscidb/QueryEngine/CardinalityEstimator.h index 29354bcda6..283dcef3a7 100644 --- a/omniscidb/QueryEngine/CardinalityEstimator.h +++ b/omniscidb/QueryEngine/CardinalityEstimator.h @@ -66,6 +66,7 @@ RelAlgExecutionUnit create_ndv_execution_unit(const RelAlgExecutionUnit& ra_exe_ RelAlgExecutionUnit create_count_all_execution_unit( const RelAlgExecutionUnit& ra_exe_unit, + const SchemaProvider* schema_provider, hdk::ir::ExprPtr replacement_target); ResultSetPtr reduce_estimator_results( diff --git a/omniscidb/QueryEngine/Descriptors/QueryFragmentDescriptor.cpp b/omniscidb/QueryEngine/Descriptors/QueryFragmentDescriptor.cpp index 28b38cad41..98d70490b7 100644 --- a/omniscidb/QueryEngine/Descriptors/QueryFragmentDescriptor.cpp +++ b/omniscidb/QueryEngine/Descriptors/QueryFragmentDescriptor.cpp @@ -151,7 +151,9 @@ void QueryFragmentDescriptor::buildFragmentPerKernelForTable( } ExecutionKernelDescriptor execution_kernel_desc{ - device_id, {}, fragment.getNumTuples()}; + /*device_id*/ device_id, + /*fragments*/ {}, + /*outer_tuple_count*/ fragment.getNumTuples()}; if (table_desc_offset) { const auto frag_ids = executor->getTableFragmentIndices(ra_exe_unit, diff --git a/omniscidb/QueryEngine/JoinFilterPushDown.cpp b/omniscidb/QueryEngine/JoinFilterPushDown.cpp index b9fe3625f0..a77cca4ad3 100644 --- a/omniscidb/QueryEngine/JoinFilterPushDown.cpp +++ b/omniscidb/QueryEngine/JoinFilterPushDown.cpp @@ -18,6 +18,7 @@ #include "IR/ExprCollector.h" #include "IR/ExprRewriter.h" #include "RelAlgExecutor.h" +#include "Visitors/InputColumnsCollector.h" namespace { @@ -27,15 +28,6 @@ class BindFilterToOutermostVisitor : public hdk::ir::ExprRewriter { } }; -class InputColumnsCollector - : public hdk::ir::ExprCollector, - InputColumnsCollector> { - protected: - void visitColumnVar(const hdk::ir::ColumnVar* col_var) override { - result_.insert(InputColDescriptor(col_var->columnInfo(), 0)); - } -}; - } // namespace /** @@ -49,7 +41,7 @@ FilterSelectivity RelAlgExecutor::getFilterSelectivity( const std::vector& filter_expressions, const CompilationOptions& co, const ExecutionOptions& eo) { - InputColumnsCollector input_columns_collector; + UsedInputsCollector input_columns_collector; std::list quals; BindFilterToOutermostVisitor bind_filter_to_outermost; for (const auto& filter_expr : filter_expressions) { @@ -59,7 +51,8 @@ FilterSelectivity RelAlgExecutor::getFilterSelectivity( auto& input_column_descriptors = input_columns_collector.result(); std::vector input_descs; std::list> input_col_descs; - for (const auto& input_col_desc : input_column_descriptors) { + for (const auto& input_col_var : input_column_descriptors) { + auto input_col_desc = InputColDescriptor(input_col_var.columnInfo(), 0); if (input_descs.empty()) { input_descs.push_back(input_col_desc.getScanDesc()); } else { diff --git a/omniscidb/QueryEngine/RelAlgExecutionUnit.cpp b/omniscidb/QueryEngine/RelAlgExecutionUnit.cpp new file mode 100644 index 0000000000..63278082e2 --- /dev/null +++ b/omniscidb/QueryEngine/RelAlgExecutionUnit.cpp @@ -0,0 +1,84 @@ +/** + * Copyright 2023 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "RelAlgExecutionUnit.h" +#include "Visitors/InputColumnsCollector.h" + +void RelAlgExecutionUnit::calcInputColDescs(const SchemaProvider* schema_provider) { + // Scan all currently used expressions to determine used columns. + UsedInputsCollector collector; + for (auto& expr : simple_quals) { + collector.visit(expr.get()); + } + for (auto& expr : quals) { + collector.visit(expr.get()); + } + for (auto& expr : groupby_exprs) { + if (expr) { + collector.visit(expr.get()); + } + } + for (auto& join_qual : join_quals) { + for (auto& expr : join_qual.quals) { + collector.visit(expr.get()); + } + } + + for (auto& expr : target_exprs) { + collector.visit(expr); + } + + if (partition_offsets_col) { + collector.visit(partition_offsets_col.get()); + } + + std::vector> col_descs; + for (auto& col_var : collector.result()) { + col_descs.push_back(std::make_shared(col_var.columnInfo(), + col_var.rteIdx())); + } + + // For UNION we only have column variables for a single table used + // in target expressions but should mark all columns as used. + if (union_all && !col_descs.empty()) { + CHECK_EQ(col_descs.front()->getNestLevel(), 0); + CHECK_EQ(input_descs.size(), (size_t)2); + TableRef processed_table_ref(col_descs.front()->getDatabaseId(), + col_descs.front()->getTableId()); + for (auto tdesc : input_descs) { + if (tdesc.getTableRef() != processed_table_ref) { + auto columns = schema_provider->listColumns(tdesc.getTableRef()); + for (auto& col_info : columns) { + if (!col_info->is_rowid) { + col_descs.push_back(std::make_shared(col_info, 0)); + } + } + } + } + } + + std::sort( + col_descs.begin(), + col_descs.end(), + [](std::shared_ptr const& lhs, + std::shared_ptr const& rhs) { + return std::make_tuple(lhs->getNestLevel(), lhs->getColId(), lhs->getTableId()) < + std::make_tuple(rhs->getNestLevel(), rhs->getColId(), rhs->getTableId()); + }); + + input_col_descs.clear(); + input_col_descs.insert(input_col_descs.end(), col_descs.begin(), col_descs.end()); +} diff --git a/omniscidb/QueryEngine/RelAlgExecutionUnit.h b/omniscidb/QueryEngine/RelAlgExecutionUnit.h index d59e9961cc..55bfc5dce3 100644 --- a/omniscidb/QueryEngine/RelAlgExecutionUnit.h +++ b/omniscidb/QueryEngine/RelAlgExecutionUnit.h @@ -1,5 +1,5 @@ -/* - * Copyright 2017 MapD Technologies, Inc. +/** + * Copyright 2023 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,7 @@ * Copyright (c) 2016 MapD Technologies, Inc. All rights reserved. **/ -#ifndef QUERYENGINE_RELALGEXECUTIONUNIT_H -#define QUERYENGINE_RELALGEXECUTIONUNIT_H +#pragma once #include "CostModel/CostModel.h" #include "Descriptors/InputDescriptors.h" @@ -64,14 +63,15 @@ using QueryPlanHash = size_t; // used to detect a correct cached hashtable struct HashTableBuildDag { public: - HashTableBuildDag(const JoinColumnsInfo& in_inner_cols_info, - const JoinColumnsInfo& in_outer_cols_info, - const QueryPlan& in_inner_cols_access_path, - const QueryPlan& in_outer_cols_access_path) + explicit HashTableBuildDag(const JoinColumnsInfo& in_inner_cols_info, + const JoinColumnsInfo& in_outer_cols_info, + const QueryPlan& in_inner_cols_access_path, + const QueryPlan& in_outer_cols_access_path) : inner_cols_info(in_inner_cols_info) , outer_cols_info(in_outer_cols_info) , inner_cols_access_path(in_inner_cols_access_path) , outer_cols_access_path(in_outer_cols_access_path) {} + JoinColumnsInfo inner_cols_info; JoinColumnsInfo outer_cols_info; QueryPlan inner_cols_access_path; @@ -127,7 +127,88 @@ struct JoinCondition { using JoinQualsPerNestingLevel = std::vector; -struct RelAlgExecutionUnit { +class RelAlgExecutionUnit { + public: + RelAlgExecutionUnit( + std::vector input_descs, + std::list> input_col_descs, + std::list simple_quals, + std::list quals, + const JoinQualsPerNestingLevel join_quals, + std::list groupby_exprs, + std::vector target_exprs, + const std::shared_ptr estimator, + const SortInfo sort_info, + size_t scan_limit, + QueryPlan query_plan_dag = {EMPTY_QUERY_PLAN}, + HashTableBuildDagMap hash_table_build_plan_dag = {}, + TableIdToNodeMap table_id_to_node_map = {}, + const std::optional union_all = {}, + std::optional shuffle_fn = {}, + hdk::ir::ExprPtr partition_offsets_col = nullptr, + bool partitioned_aggregation = false, + std::shared_ptr cost_model = nullptr, + std::vector templs = {}) + : input_descs(std::move(input_descs)) + , input_col_descs(std::move(input_col_descs)) + , simple_quals(std::move(simple_quals)) + , quals(std::move(quals)) + , join_quals(std::move(join_quals)) + , groupby_exprs(std::move(groupby_exprs)) + , target_exprs(std::move(target_exprs)) + , estimator(std::move(estimator)) + , sort_info(std::move(sort_info)) + , scan_limit(scan_limit) + , query_plan_dag(std::move(query_plan_dag)) + , hash_table_build_plan_dag(std::move(hash_table_build_plan_dag)) + , table_id_to_node_map(std::move(table_id_to_node_map)) + , union_all(union_all) + , shuffle_fn(std::move(shuffle_fn)) + , partition_offsets_col(std::move(partition_offsets_col)) + , partitioned_aggregation(partitioned_aggregation) + , cost_model(std::move(cost_model)) + , templs(std::move(templs)) {} + + RelAlgExecutionUnit(std::vector input_descs, + const SchemaProvider* schema_provider, + std::list simple_quals, + std::list quals, + const JoinQualsPerNestingLevel join_quals, + std::list groupby_exprs, + std::vector target_exprs, + const std::shared_ptr estimator, + const SortInfo sort_info, + size_t scan_limit, + QueryPlan query_plan_dag = {EMPTY_QUERY_PLAN}, + HashTableBuildDagMap hash_table_build_plan_dag = {}, + TableIdToNodeMap table_id_to_node_map = {}, + const std::optional union_all = {}, + std::optional shuffle_fn = {}, + hdk::ir::ExprPtr partition_offsets_col = nullptr, + bool partitioned_aggregation = false, + std::shared_ptr cost_model = nullptr, + std::vector templs = {}) + : input_descs(std::move(input_descs)) + , simple_quals(std::move(simple_quals)) + , quals(std::move(quals)) + , join_quals(std::move(join_quals)) + , groupby_exprs(std::move(groupby_exprs)) + , target_exprs(std::move(target_exprs)) + , estimator(std::move(estimator)) + , sort_info(std::move(sort_info)) + , scan_limit(scan_limit) + , query_plan_dag(std::move(query_plan_dag)) + , hash_table_build_plan_dag(std::move(hash_table_build_plan_dag)) + , table_id_to_node_map(std::move(table_id_to_node_map)) + , union_all(union_all) + , shuffle_fn(std::move(shuffle_fn)) + , partition_offsets_col(std::move(partition_offsets_col)) + , partitioned_aggregation(partitioned_aggregation) + , cost_model(std::move(cost_model)) + , templs(std::move(templs)) { + calcInputColDescs(schema_provider); + } + std::vector input_descs; std::list> input_col_descs; std::list simple_quals; @@ -158,9 +239,11 @@ struct RelAlgExecutionUnit { bool isShuffleCount() const { return shuffle_fn && !partition_offsets_col; } bool isShuffle() const { return shuffle_fn && partition_offsets_col; } + + private: + // Method used for creation input_col_descs from qualifires and expressions + void calcInputColDescs(const SchemaProvider* schema_provider); }; std::ostream& operator<<(std::ostream& os, const RelAlgExecutionUnit& ra_exe_unit); std::string ra_exec_unit_desc_for_caching(const RelAlgExecutionUnit& ra_exe_unit); - -#endif // QUERYENGINE_RELALGEXECUTIONUNIT_H diff --git a/omniscidb/QueryEngine/RelAlgExecutor.cpp b/omniscidb/QueryEngine/RelAlgExecutor.cpp index 81f4ee3c58..d89d8d976b 100644 --- a/omniscidb/QueryEngine/RelAlgExecutor.cpp +++ b/omniscidb/QueryEngine/RelAlgExecutor.cpp @@ -473,20 +473,6 @@ void RelAlgExecutor::handleNop(RaExecutionDesc& ed) { namespace { -struct ColumnRefHash { - size_t operator()(const hdk::ir::ColumnRef& col_ref) const { return col_ref.hash(); } -}; - -using ColumnRefSet = std::unordered_set; - -class UsedInputsCollector - : public hdk::ir::ExprCollector { - protected: - void visitColumnRef(const hdk::ir::ColumnRef* col_ref) override { - result_.insert(*col_ref); - } -}; - const hdk::ir::Node* get_data_sink(const hdk::ir::Node* ra_node) { if (auto join = dynamic_cast(ra_node)) { CHECK_EQ(size_t(2), join->inputCount()); @@ -1526,8 +1512,8 @@ std::optional RelAlgExecutor::getFilteredCountAll(const WorkUnit& work_u count_all_agg = builder.count(); } - const auto count_all_exe_unit = - create_count_all_execution_unit(work_unit.exe_unit, count_all_agg.expr()); + const auto count_all_exe_unit = create_count_all_execution_unit( + work_unit.exe_unit, schema_provider_.get(), count_all_agg.expr()); size_t one{1}; hdk::ResultSetTable count_all_result; try { diff --git a/omniscidb/QueryEngine/Visitors/InputColumnsCollector.h b/omniscidb/QueryEngine/Visitors/InputColumnsCollector.h new file mode 100644 index 0000000000..d92fabe196 --- /dev/null +++ b/omniscidb/QueryEngine/Visitors/InputColumnsCollector.h @@ -0,0 +1,37 @@ +/** + * Copyright 2023 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "IR/ExprCollector.h" + +#include + +struct ColumnVarHash { + size_t operator()(const hdk::ir::ColumnVar& col_var) const { return col_var.hash(); } +}; + +using ColumnVarSet = std::unordered_set; + +class UsedInputsCollector + : public hdk::ir::ExprCollector { + protected: + void visitColumnRef(const hdk::ir::ColumnRef* col_ref) override { CHECK(false); } + + void visitColumnVar(const hdk::ir::ColumnVar* col_var) override { + result_.insert(*col_var); + } +}; diff --git a/omniscidb/QueryEngine/WorkUnitBuilder.cpp b/omniscidb/QueryEngine/WorkUnitBuilder.cpp index 801f53aa2c..becc2926a2 100644 --- a/omniscidb/QueryEngine/WorkUnitBuilder.cpp +++ b/omniscidb/QueryEngine/WorkUnitBuilder.cpp @@ -14,24 +14,12 @@ #include "QueryEngine/ExpressionRewrite.h" #include "QueryEngine/FromTableReordering.h" #include "QueryEngine/QueryPlanDagExtractor.h" +#include "Visitors/InputColumnsCollector.h" namespace hdk { namespace { -struct ColumnVarHash { - size_t operator()(const ir::ColumnVar& col_var) const { return col_var.hash(); } -}; - -using ColumnVarSet = std::unordered_set; - -class UsedInputsCollector : public ir::ExprCollector { - protected: - void visitColumnRef(const ir::ColumnRef* col_ref) override { CHECK(false); } - - void visitColumnVar(const ir::ColumnVar* col_var) override { result_.insert(*col_var); } -}; - class StringGuardForL0 : public ir::ExprVisitor { public: bool isStrPresent() { return string_present_; } @@ -338,7 +326,7 @@ RelAlgExecutionUnit WorkUnitBuilder::exeUnit() const { target_exprs.push_back(expr.get()); } return {input_descs_, - input_col_descs_, + schema_provider_.get(), simple_quals_, quals_, join_quals_, @@ -375,7 +363,7 @@ void WorkUnitBuilder::build() { reorderTables(); } computeSimpleQuals(); - computeInputColDescs(); + verifyInputColDescs(); } void WorkUnitBuilder::process(const ir::Node* node) { @@ -882,16 +870,22 @@ void WorkUnitBuilder::computeInputDescs() { }); } -void WorkUnitBuilder::computeInputColDescs() { +void WorkUnitBuilder::verifyInputColDescs() { + bool is_l0 = co_.device_type == ExecutorDeviceType::GPU && executor_ && + executor_->getDataMgr()->getGpuMgr() && + executor_->getDataMgr()->getGpuMgr()->getPlatform() == GpuMgrPlatform::L0; + + if (!is_l0) { + return; + } + // Scan all currently used expressions to determine used columns. UsedInputsCollector collector; for (auto& expr : simple_quals_) { collector.visit(expr.get()); } for (auto& expr : quals_) { - if (expr->as() && co_.device_type == ExecutorDeviceType::GPU && - executor_ && executor_->getDataMgr()->getGpuMgr() && - executor_->getDataMgr()->getGpuMgr()->getPlatform() == GpuMgrPlatform::L0) { + if (expr->as()) { StringDictTransGuardForL0 strDictTransGuard; strDictTransGuard.visit(expr.get()); if (strDictTransGuard.isStrDictTranslation()) { @@ -911,61 +905,12 @@ void WorkUnitBuilder::computeInputColDescs() { } } - if (co_.device_type == ExecutorDeviceType::GPU && executor_ && - executor_->getDataMgr()->getGpuMgr() && - executor_->getDataMgr()->getGpuMgr()->getPlatform() == GpuMgrPlatform::L0) { - ColumnVarSet non_targets_touch = collector.result(); - for (const auto& col_var : non_targets_touch) { - if (col_var.columnInfo()->type->isString()) { - throw QueryMustRunOnCpu(); - } - } - } - - for (auto& expr : target_exprs_[0]) { - collector.visit(expr.get()); - } - - if (partition_offsets_col_) { - collector.visit(partition_offsets_col_.get()); - } - - std::vector> col_descs; - for (auto& col_var : collector.result()) { - col_descs.push_back(std::make_shared(col_var.columnInfo(), - col_var.rteIdx())); - } - - // For UNION we only have column variables for a single table used - // in target expressions but should mark all columns as used. - if (union_all_ && !col_descs.empty()) { - CHECK_EQ(col_descs.front()->getNestLevel(), 0); - CHECK_EQ(input_descs_.size(), (size_t)2); - TableRef processed_table_ref(col_descs.front()->getDatabaseId(), - col_descs.front()->getTableId()); - for (auto tdesc : input_descs_) { - if (tdesc.getTableRef() != processed_table_ref) { - auto columns = schema_provider_->listColumns(tdesc.getTableRef()); - for (auto& col_info : columns) { - if (!col_info->is_rowid) { - col_descs.push_back(std::make_shared(col_info, 0)); - } - } - } + auto non_targets_touch = collector.result(); + for (const auto& col_var : non_targets_touch) { + if (col_var.columnInfo()->type->isString()) { + throw QueryMustRunOnCpu(); } } - - std::sort( - col_descs.begin(), - col_descs.end(), - [](std::shared_ptr const& lhs, - std::shared_ptr const& rhs) { - return std::make_tuple(lhs->getNestLevel(), lhs->getColId(), lhs->getTableId()) < - std::make_tuple(rhs->getNestLevel(), rhs->getColId(), rhs->getTableId()); - }); - - input_col_descs_.clear(); - input_col_descs_.insert(input_col_descs_.end(), col_descs.begin(), col_descs.end()); } } // namespace hdk diff --git a/omniscidb/QueryEngine/WorkUnitBuilder.h b/omniscidb/QueryEngine/WorkUnitBuilder.h index 4223df54b4..3ca9932e8c 100644 --- a/omniscidb/QueryEngine/WorkUnitBuilder.h +++ b/omniscidb/QueryEngine/WorkUnitBuilder.h @@ -71,7 +71,7 @@ class WorkUnitBuilder { int assignNestLevels(const ir::Node* node, int start_idx = 0); void computeJoinTypes(const ir::Node* node, bool allow_join = true); void computeInputDescs(); - void computeInputColDescs(); + void verifyInputColDescs(); class InputRewriter : public ir::ExprRewriter { public: @@ -122,7 +122,6 @@ class WorkUnitBuilder { bool is_agg_ = false; std::vector input_descs_; - std::list> input_col_descs_; std::list simple_quals_; std::list quals_; JoinQualsPerNestingLevel join_quals_;