From 7a199e1e0e29b852616f783ea68559705b79bf33 Mon Sep 17 00:00:00 2001 From: William Zhang <17zhangw@gmail.com> Date: Sun, 11 Jul 2021 07:45:45 -0700 Subject: [PATCH] Index Knobs Support (#1623) --- benchmark/catalog/catalog_benchmark.cpp | 4 +- benchmark/runner/execution_runners.cpp | 105 +++++--- benchmark/storage/index_wrapper_benchmark.cpp | 3 +- benchmark/storage/recovery_benchmark.cpp | 5 +- sample_tpl/parallel-agg.tpl | 2 +- sample_tpl/parallel-join.tpl | 4 +- sample_tpl/parallel-scan.tpl | 2 +- script/self_driving/modeling/type.py | 12 +- src/catalog/index_schema.cpp | 26 +- src/catalog/postgres/builder.cpp | 82 ++++-- src/catalog/postgres/pg_core_impl.cpp | 8 +- src/execution/compiler/codegen.cpp | 6 +- .../operator/index_create_translator.cpp | 23 +- src/execution/exec/execution_context.cpp | 3 +- src/execution/sema/sema_builtin.cpp | 16 +- src/execution/sql/ddl_executors.cpp | 7 +- src/execution/sql/table_vector_iterator.cpp | 7 +- src/execution/vm/bytecode_emitter.cpp | 6 +- src/execution/vm/bytecode_generator.cpp | 8 +- src/execution/vm/vm.cpp | 3 +- src/include/catalog/index_schema.h | 245 +++++++++++++++++- src/include/catalog/postgres/pg_class.h | 16 +- src/include/execution/sql/ddl_executors.h | 1 + .../execution/sql/table_vector_iterator.h | 3 +- src/include/execution/vm/bytecode_emitter.h | 2 +- src/include/execution/vm/bytecode_handlers.h | 3 +- src/include/execution/vm/bytecodes.h | 2 +- src/include/metrics/pipeline_metric.h | 20 +- src/include/optimizer/logical_operators.h | 14 +- src/include/parser/create_statement.h | 11 +- .../self_driving/modeling/operating_unit.h | 34 ++- .../modeling/operating_unit_defs.h | 11 + .../modeling/operating_unit_recorder.h | 15 ++ src/include/storage/index/bplustree.h | 9 +- src/include/storage/index/bplustree_index.h | 18 ++ src/include/storage/index/index_builder.h | 11 + src/include/storage/sql_table.h | 5 + src/optimizer/logical_operators.cpp | 20 +- src/optimizer/physical_operators.cpp | 5 +- src/optimizer/plan_generator.cpp | 5 +- .../query_to_operator_transformer.cpp | 2 +- src/optimizer/rules/implementation_rules.cpp | 7 +- src/parser/postgresparser.cpp | 33 ++- .../modeling/operating_unit_recorder.cpp | 96 ++++++- .../modeling/operating_unit_util.cpp | 8 +- src/storage/index/bplustree_index.cpp | 20 ++ src/storage/index/index_builder.cpp | 48 ++++ src/storage/recovery/recovery_manager.cpp | 16 +- test/catalog/catalog_test.cpp | 24 +- test/execution/ddl_executors_test.cpp | 5 +- test/execution/index_create_test.cpp | 12 +- .../sql_table_vector_iterator_test.cpp | 2 +- test/include/test_util/end_to_end_test.h | 6 + test/include/test_util/storage_test_util.h | 6 +- test/include/test_util/tpcc/schemas.h | 30 ++- test/optimizer/logical_operator_test.cpp | 36 +-- test/optimizer/operator_transformer_test.cpp | 4 +- test/optimizer/physical_operator_test.cpp | 15 +- test/self_driving_e2e/model_server_test.cpp | 8 +- test/sql/create_index_options_test.cpp | 53 ++++ test/storage/bplustree_index_test.cpp | 7 +- test/storage/bwtree_index_test.cpp | 6 +- test/storage/hash_index_test.cpp | 7 +- test/storage/index_key_test.cpp | 30 ++- test/storage/recovery_test.cpp | 3 +- .../table_generator/table_generator.cpp | 4 +- .../table_generator/table_reader.cpp | 3 +- 67 files changed, 1047 insertions(+), 226 deletions(-) create mode 100644 test/sql/create_index_options_test.cpp diff --git a/benchmark/catalog/catalog_benchmark.cpp b/benchmark/catalog/catalog_benchmark.cpp index 92a28a6f68..aac3876d29 100644 --- a/benchmark/catalog/catalog_benchmark.cpp +++ b/benchmark/catalog/catalog_benchmark.cpp @@ -105,7 +105,9 @@ class CatalogBenchmark : public benchmark::Fixture { const catalog::Schema::Column &col) { std::vector key_cols{catalog::IndexSchema::Column{ col.Name(), type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, col.Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); const auto idx_oid = accessor->CreateIndex(accessor->GetDefaultNamespace(), table_oid, index_name, index_schema); NOISEPAGE_ASSERT(idx_oid != catalog::INVALID_INDEX_OID, "index creation should not fail"); auto true_schema = accessor->GetIndexSchema(idx_oid); diff --git a/benchmark/runner/execution_runners.cpp b/benchmark/runner/execution_runners.cpp index 11d1e1eb8d..cb9677d0bd 100644 --- a/benchmark/runner/execution_runners.cpp +++ b/benchmark/runner/execution_runners.cpp @@ -36,6 +36,7 @@ #include "self_driving/modeling/operating_unit.h" #include "self_driving/modeling/operating_unit_defs.h" #include "self_driving/modeling/operating_unit_recorder.h" +#include "storage/index/bplustree.h" #include "storage/sql_table.h" #include "storage/storage_defs.h" #include "traffic_cop/traffic_cop_util.h" @@ -495,6 +496,7 @@ class ExecutionRunners : public benchmark::Fixture { execution::sql::TableGenerator table_generator(exec_ctx.get(), block_store, accessor->GetDefaultNamespace()); if (is_build) { + // This function builds a BPLUSTREE index table_generator.BuildExecutionRunnerIndex(type, tbl_cols, num_rows, num_key); } else { bool result = table_generator.DropExecutionRunnerIndex(type, tbl_cols, num_rows, num_key); @@ -584,7 +586,7 @@ class ExecutionRunners : public benchmark::Fixture { selfdriving::PipelineOperatingUnits units; selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; exec_ctx->SetPipelineOperatingUnits(common::ManagedPointer(&units)); - pipe0_vec.emplace_back(execution::translator_id_t(1), type, num_elem, 4, 1, num_elem, 1, 0, 0); + pipe0_vec.emplace_back(execution::translator_id_t(1), type, num_elem, 4, 1, num_elem, 1, 0, 0, 0, 0); units.RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); selfdriving::ExecOUFeatureVector ouvec; @@ -948,7 +950,7 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ0_OutputRunners)(benchmark::State &state auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, row_num, - tuple_size, num_col, 0, 1, 0, 0); + tuple_size, num_col, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); exec_query.SetPipelineOperatingUnits(std::move(units)); @@ -978,7 +980,7 @@ void ExecutionRunners::ExecuteIndexOperation(benchmark::State *state, bool is_in auto units = std::make_unique(); std::stringstream query; - query << "CREATE INDEX idx" << i << " ON " << tbl_name << " (" << cols << ")"; + query << "CREATE INDEX idx" << i << " ON " << tbl_name << " USING BPLUSTREE (" << cols << ")"; auto equery = OptimizeSqlStatement(query.str(), std::make_unique(), std::move(units), PassthroughPlanChecker, nullptr, nullptr, &settings); BenchmarkExecQuery(1, equery.first.get(), equery.second.get(), true, &empty_params, &settings); @@ -1019,8 +1021,9 @@ void ExecutionRunners::ExecuteIndexOperation(benchmark::State *state, bool is_in : selfdriving::ExecutionOperatingUnitType::INDEX_DELETE; auto type_size = type::TypeUtil::GetTypeSize(type); auto key_size = type_size * key_num; - pipe0_vec.emplace_back(execution::translator_id_t(1), feature_type, num_rows, key_size, key_num, num_index, 1, 0, - 0); + pipe0_vec.emplace_back(execution::translator_id_t(1), feature_type, num_rows, key_size, key_num, num_index, 1, 0, 0, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); selfdriving::PipelineOperatingUnits units; units.RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); exec_ctx->SetPipelineOperatingUnits(common::ManagedPointer(&units)); @@ -1195,9 +1198,9 @@ void ExecutionRunners::ExecuteSeqScan(benchmark::State *state) { auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, row, - tuple_size, num_col, 0, 1, 0, 0); + tuple_size, num_col, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); std::string query_final; @@ -1255,9 +1258,11 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ2_0_IndexScanRunners)(benchmark::State & auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, lookup_size, - tuple_size, key_num, 0, 1, 0, 0); + tuple_size, key_num, 0, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::IDX_SCAN, num_rows, - tuple_size, key_num, lookup_size, 1, 0, 0); + tuple_size, key_num, lookup_size, 1, 0, 0, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); std::vector params; @@ -1334,15 +1339,17 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ2_1_IndexJoinRunners)(benchmark::State & // We only ever emit min(outer, inner) # of tuples // Even though there are no matches, it still might be a good idea to see what the relation is pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, - std::min(inner, outer), tuple_size, key_num, 0, 1, 0, 0); + std::min(inner, outer), tuple_size, key_num, 0, 1, 0, 0, 0, 0); // Outer table scan happens pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, outer, - tuple_size, key_num, outer, 1, 0, 0); + tuple_size, key_num, outer, 1, 0, 0, 0, 0); // For each in outer, match 1 tuple in inner pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::IDX_SCAN, inner, - tuple_size, key_num, 1, 1, outer, 0); + tuple_size, key_num, 1, 1, outer, 0, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); auto cols = ConstructSQLClause(type, type::TypeId::INVALID, key_num, 0, ", ", "a", false, ""); @@ -1440,7 +1447,7 @@ void ExecutionRunners::ExecuteInsert(benchmark::State *state) { auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::INSERT, num_rows, - tuple_size, num_cols, num_rows, 1, 0, 0); + tuple_size, num_cols, num_rows, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); auto num_iters = 1 + ((num_rows <= settings.warmup_rows_limit_) ? settings.warmup_iterations_num_ : 0); @@ -1526,9 +1533,11 @@ void ExecutionRunners::ExecuteUpdate(benchmark::State *state) { auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::UPDATE, car, - tuple_size, update_keys, car, 1, 0, 0); + tuple_size, update_keys, car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::IDX_SCAN, row, - idx_size, num_col, car, 1, 0, 0); + idx_size, num_col, car, 1, 0, 0, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); std::vector params; @@ -1603,11 +1612,15 @@ void ExecutionRunners::ExecuteDelete(benchmark::State *state) { auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::DELETE, car, tbl_size, - tbl_col, car, 1, 0, 0); + tbl_col, car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::INDEX_DELETE, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::IDX_SCAN, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); std::vector params; @@ -1670,12 +1683,12 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ3_SortRunners)(benchmark::State &state) output_num = car; } pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, row, - tuple_size, num_col, table_car, 1, 0, 0); - pipe0_vec.emplace_back(execution::translator_id_t(1), build_ou_type, row, tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, table_car, 1, 0, 0, 0, 0); + pipe0_vec.emplace_back(execution::translator_id_t(1), build_ou_type, row, tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SORT_ITERATE, - output_num, tuple_size, num_col, car, 1, 0, 0); + output_num, tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, output_num, - tuple_size, num_col, 0, 1, 0, 0); + tuple_size, num_col, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(2), std::move(pipe0_vec)); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe1_vec)); @@ -1720,15 +1733,15 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ4_HashJoinSelfRunners)(benchmark::State selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; selfdriving::ExecutionOperatingUnitFeatureVector pipe1_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::HASHJOIN_BUILD, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::HASHJOIN_PROBE, row, - tuple_size, num_col, hj_output, 1, 0, 0); + tuple_size, num_col, hj_output, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, hj_output, - tuple_size, num_col, 0, 1, 0, 0); + tuple_size, num_col, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(2), std::move(pipe0_vec)); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe1_vec)); @@ -1773,15 +1786,15 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ4_HashJoinNonSelfRunners)(benchmark::Sta selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; selfdriving::ExecutionOperatingUnitFeatureVector pipe1_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, build_row, - tuple_size, num_col, build_car, 1, 0, 0); + tuple_size, num_col, build_car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::HASHJOIN_BUILD, - build_row, tuple_size, num_col, build_car, 1, 0, 0); + build_row, tuple_size, num_col, build_car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, probe_row, - tuple_size, num_col, probe_car, 1, 0, 0); + tuple_size, num_col, probe_car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::HASHJOIN_PROBE, - probe_row, tuple_size, num_col, matched_car, 1, 0, 0); + probe_row, tuple_size, num_col, matched_car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, matched_car, - tuple_size, num_col, 0, 1, 0, 0); + tuple_size, num_col, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(2), std::move(pipe0_vec)); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe1_vec)); @@ -1834,13 +1847,13 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ5_0_AggregateRunners)(benchmark::State & selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; selfdriving::ExecutionOperatingUnitFeatureVector pipe1_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::AGGREGATE_BUILD, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::AGGREGATE_ITERATE, car, - out_size, out_cols, car, 1, 0, 0); + out_size, out_cols, car, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, car, out_size, - out_cols, 0, 1, 0, 0); + out_cols, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(2), std::move(pipe0_vec)); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe1_vec)); @@ -1878,13 +1891,13 @@ BENCHMARK_DEFINE_F(ExecutionRunners, SEQ5_1_AggregateRunners)(benchmark::State & selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; selfdriving::ExecutionOperatingUnitFeatureVector pipe1_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::SEQ_SCAN, row, - tuple_size, num_col, car, 1, 0, 0); + tuple_size, num_col, car, 1, 0, 0, 0, 0); pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::AGGREGATE_BUILD, row, - 0, num_col, 1, 1, 0, 0); + 0, num_col, 1, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::AGGREGATE_ITERATE, 1, - out_size, out_cols, 1, 1, 0, 0); + out_size, out_cols, 1, 1, 0, 0, 0, 0); pipe1_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::OUTPUT, 1, out_size, - out_cols, 0, 1, 0, 0); + out_cols, 0, 1, 0, 0, 0, 0); units->RecordOperatingUnit(execution::pipeline_id_t(2), std::move(pipe0_vec)); units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe1_vec)); @@ -1943,12 +1956,20 @@ void ExecutionRunners::ExecuteCreateIndex(benchmark::State *state) { auto units = std::make_unique(); selfdriving::ExecutionOperatingUnitFeatureVector pipe0_vec; pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::CREATE_INDEX, row, - tuple_size, num_col, car, 1, 0, num_threads); + tuple_size, num_col, car, 1, 0, num_threads, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); + if (num_threads != 0) { + pipe0_vec.emplace_back(execution::translator_id_t(1), selfdriving::ExecutionOperatingUnitType::CREATE_INDEX_MAIN, + row, tuple_size, num_col, car, 1, 0, num_threads, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD, + storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD); + } units->RecordOperatingUnit(execution::pipeline_id_t(1), std::move(pipe0_vec)); std::stringstream query; std::string idx_name("runner_idx"); - query << "CREATE INDEX " << idx_name << " ON " << tbl_name << " (" << cols << ")"; + query << "CREATE INDEX " << idx_name << " ON " << tbl_name << " USING BPLUSTREE (" << cols << ")"; auto equery = OptimizeSqlStatement(query.str(), std::make_unique(), std::move(units), PassthroughPlanChecker, nullptr, nullptr, &exec_settings); BenchmarkExecQuery(1, equery.first.get(), equery.second.get(), true, &empty_params, &exec_settings); diff --git a/benchmark/storage/index_wrapper_benchmark.cpp b/benchmark/storage/index_wrapper_benchmark.cpp index d4579de6fe..acdc6de858 100644 --- a/benchmark/storage/index_wrapper_benchmark.cpp +++ b/benchmark/storage/index_wrapper_benchmark.cpp @@ -105,7 +105,8 @@ class IndexBenchmark : public benchmark::Fixture { StorageTestUtil::ForceOid(&(keycols[0]), catalog::indexkeycol_oid_t(1)); // Define fields of index schema and declare index - index_schema_ = catalog::IndexSchema(keycols, type, false, false, false, true); + catalog::IndexOptions options; + index_schema_ = catalog::IndexSchema(keycols, type, false, false, false, true, options); index_ = (storage::index::IndexBuilder().SetKeySchema(index_schema_)).Build(); // Register index to garbage collector diff --git a/benchmark/storage/recovery_benchmark.cpp b/benchmark/storage/recovery_benchmark.cpp index 48b0775536..6ecaf81591 100644 --- a/benchmark/storage/recovery_benchmark.cpp +++ b/benchmark/storage/recovery_benchmark.cpp @@ -181,7 +181,10 @@ BENCHMARK_DEFINE_F(RecoveryBenchmark, IndexRecovery)(benchmark::State &state) { auto index_col = catalog::IndexSchema::Column("index_col", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_oid, table_oid, schema.GetColumn(0).Oid())); - catalog::IndexSchema index_schema({index_col}, storage::index::IndexType::BPLUSTREE, true, false, false, true); + + catalog::IndexOptions options; + catalog::IndexSchema index_schema({index_col}, storage::index::IndexType::BPLUSTREE, true, false, false, true, + options); auto index_oid = catalog_accessor->CreateIndex(namespace_oid, table_oid, index_name + std::to_string(i), index_schema); auto *index = storage::index::IndexBuilder().SetKeySchema(index_schema).Build(); diff --git a/sample_tpl/parallel-agg.tpl b/sample_tpl/parallel-agg.tpl index ec84cca8ca..d9f75bb665 100644 --- a/sample_tpl/parallel-agg.tpl +++ b/sample_tpl/parallel-agg.tpl @@ -145,7 +145,7 @@ fun main(execCtx: *ExecutionContext) -> int { @tlsReset(tls, @sizeOf(ThreadState_1), p1_worker_initThreadState, p1_worker_tearDownThreadState, execCtx) // Parallel Scan - @iterateTableParallel("test_1", &state, tls, p1_worker) + @iterateTableParallel("test_1", &state, tls, 0, p1_worker) // ---- Pipeline 1 End ---- // diff --git a/sample_tpl/parallel-join.tpl b/sample_tpl/parallel-join.tpl index e68dafaa7c..58e0d2c5fd 100644 --- a/sample_tpl/parallel-join.tpl +++ b/sample_tpl/parallel-join.tpl @@ -109,7 +109,7 @@ fun pipeline1(execCtx: *ExecutionContext, state: *State) -> nil { var table_oid = @testCatalogLookup(execCtx, "test_1", "") var col_oids : [1]uint32 col_oids[0] = @testCatalogLookup(execCtx, "test_1", "colA") - @iterateTableParallel(table_oid, col_oids, state, execCtx, pipeline1_worker) + @iterateTableParallel(table_oid, col_oids, state, execCtx, 0, pipeline1_worker) // Parallel build the join hash table var off: uint32 = 0 @@ -127,7 +127,7 @@ fun pipeline2(execCtx: *ExecutionContext, state: *State) -> nil { var table_oid = @testCatalogLookup(execCtx, "test_1", "") var col_oids : [1]uint32 col_oids[0] = @testCatalogLookup(execCtx, "test_1", "colA") - @iterateTableParallel(table_oid, col_oids, state, execCtx, pipeline2_worker) + @iterateTableParallel(table_oid, col_oids, state, execCtx, 0, pipeline2_worker) // Collect results @tlsIterate(tls, state, pipeline2_finalize) diff --git a/sample_tpl/parallel-scan.tpl b/sample_tpl/parallel-scan.tpl index 4dcc7b70b4..7707fb920e 100644 --- a/sample_tpl/parallel-scan.tpl +++ b/sample_tpl/parallel-scan.tpl @@ -62,7 +62,7 @@ fun pipeline1(execCtx: *ExecutionContext, state: *State) -> nil { table_oid = @testCatalogLookup(execCtx, "test_1", "") var col_oids: [1]uint32 col_oids[0] = @testCatalogLookup(execCtx, "test_1", "colA") - @iterateTableParallel(table_oid, col_oids, state, execCtx, pipeline1_worker) + @iterateTableParallel(table_oid, col_oids, state, execCtx, 0, pipeline1_worker) // Collect results @tlsIterate(tls, state, pipeline1_finalize) diff --git a/script/self_driving/modeling/type.py b/script/self_driving/modeling/type.py index 1be4c91058..8cb504404c 100644 --- a/script/self_driving/modeling/type.py +++ b/script/self_driving/modeling/type.py @@ -90,13 +90,15 @@ class ExecutionFeature(enum.IntEnum): MEM_FACTOR = 10, NUM_LOOPS = 11, NUM_CONCURRENT = 12, + SPECIFIC_FEATURE0 = 13, + SPECIFIC_FEATURE1 = 14, # interval input features - TXNS_DEALLOCATED = 13, - TXNS_UNLINKED = 14, - BUFFER_UNLINKED = 15, - READONLY_UNLINKED = 16, - INTERVAL = 17, + TXNS_DEALLOCATED = 15, + TXNS_UNLINKED = 16, + BUFFER_UNLINKED = 17, + READONLY_UNLINKED = 18, + INTERVAL = 19, class ConcurrentCountingMode(enum.Enum): diff --git a/src/catalog/index_schema.cpp b/src/catalog/index_schema.cpp index b1b6845d99..9dad9d7980 100644 --- a/src/catalog/index_schema.cpp +++ b/src/catalog/index_schema.cpp @@ -4,6 +4,26 @@ namespace noisepage::catalog { +nlohmann::json IndexOptions::ToJson() const { + nlohmann::json j; + std::vector> options; + options.reserve(GetOptions().size()); + for (const auto &pair : GetOptions()) { + options.emplace_back(pair.first, pair.second->ToJson()); + } + j["knobs"] = options; + return j; +} + +void IndexOptions::FromJson(const nlohmann::json &j) { + auto options = j.at("knobs").get>>(); + for (const auto &key_json : options) { + auto deserialized = parser::DeserializeExpression(key_json.second); + AddOption(key_json.first, std::move(deserialized.result_)); + NOISEPAGE_ASSERT(deserialized.non_owned_exprs_.empty(), "There should be 0 non owned expressions"); + } +} + nlohmann::json IndexSchema::Column::ToJson() const { nlohmann::json j; j["name"] = name_; @@ -37,6 +57,7 @@ nlohmann::json IndexSchema::ToJson() const { j["primary"] = is_primary_; j["exclusion"] = is_exclusion_; j["immediate"] = is_immediate_; + j["options"] = index_options_; return j; } @@ -52,12 +73,15 @@ std::unique_ptr IndexSchema::DeserializeSchema(const nlohmann::json auto exclusion = j.at("exclusion").get(); auto immediate = j.at("immediate").get(); auto type = static_cast(j.at("type").get()); + auto index_options = j.at("options").get(); - auto schema = std::make_unique(columns, type, unique, primary, exclusion, immediate); + auto schema = + std::make_unique(columns, type, unique, primary, exclusion, immediate, std::move(index_options)); return schema; } +DEFINE_JSON_BODY_DECLARATIONS(IndexOptions); DEFINE_JSON_BODY_DECLARATIONS(IndexSchema::Column); DEFINE_JSON_BODY_DECLARATIONS(IndexSchema); diff --git a/src/catalog/postgres/builder.cpp b/src/catalog/postgres/builder.cpp index ac4bba9249..6e861ae05b 100644 --- a/src/catalog/postgres/builder.cpp +++ b/src/catalog/postgres/builder.cpp @@ -51,7 +51,8 @@ IndexSchema Builder::GetDatabaseOidIndexSchema() { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -65,7 +66,8 @@ IndexSchema Builder::GetDatabaseNameIndexSchema() { columns.back().SetOid(indexkeycol_oid_t(1)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -208,6 +210,13 @@ Schema Builder::GetClassTableSchema() { columns.emplace_back("relkind", type::TypeId::TINYINT, false, parser::ConstantValueExpression(type::TypeId::TINYINT)); columns.back().SetOid(PgClass::RELKIND.oid_); + // TODO(wz2): Technically this should be a text[] from https://www.postgresql.org/docs/8.3/catalog-pg-class.html. + // However, we currently do not support array types. For now, the options supplied to CREATE INDEX are dumped + // in JSON form and stored in this column. + columns.emplace_back("reloptions", type::TypeId::VARCHAR, MAX_NAME_LENGTH, true, + parser::ConstantValueExpression(type::TypeId::VARCHAR)); + columns.back().SetOid(PgClass::RELOPTIONS.oid_); + columns.emplace_back("schema", type::TypeId::BIGINT, false, parser::ConstantValueExpression(type::TypeId::BIGINT)); columns.back().SetOid(PgClass::REL_SCHEMA.oid_); @@ -397,7 +406,8 @@ IndexSchema Builder::GetNamespaceOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -410,7 +420,8 @@ IndexSchema Builder::GetNamespaceNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -423,7 +434,8 @@ IndexSchema Builder::GetClassOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -440,7 +452,8 @@ IndexSchema Builder::GetClassNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -453,7 +466,8 @@ IndexSchema Builder::GetClassNamespaceIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -466,7 +480,8 @@ IndexSchema Builder::GetIndexOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -479,7 +494,8 @@ IndexSchema Builder::GetIndexTableIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -496,7 +512,8 @@ IndexSchema Builder::GetColumnOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Primary, must be a BPLUSTREE due to ScanAscending usage - IndexSchema schema(columns, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); return schema; } @@ -513,7 +530,8 @@ IndexSchema Builder::GetColumnNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -526,7 +544,8 @@ IndexSchema Builder::GetTypeOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -543,7 +562,8 @@ IndexSchema Builder::GetTypeNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -556,7 +576,8 @@ IndexSchema Builder::GetTypeNamespaceIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -570,7 +591,8 @@ IndexSchema Builder::GetConstraintOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -589,7 +611,8 @@ IndexSchema Builder::GetConstraintNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -603,7 +626,8 @@ IndexSchema Builder::GetConstraintNamespaceIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -617,7 +641,8 @@ IndexSchema Builder::GetConstraintTableIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -631,7 +656,8 @@ IndexSchema Builder::GetConstraintIndexIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -645,7 +671,8 @@ IndexSchema Builder::GetConstraintForeignTableIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Not unique - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, false, false, false, true, options); return schema; } @@ -658,7 +685,8 @@ IndexSchema Builder::GetLanguageOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -671,7 +699,8 @@ IndexSchema Builder::GetLanguageNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, false, false, true, options); return schema; } @@ -813,7 +842,8 @@ IndexSchema Builder::GetProcOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(1)); // Primary - IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::HASHMAP, true, true, false, true, options); return schema; } @@ -830,7 +860,8 @@ IndexSchema Builder::GetProcNameIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Non-Unique, not primary - IndexSchema schema(columns, storage::index::IndexType::BPLUSTREE, false, false, false, false); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::BPLUSTREE, false, false, false, false, options); return schema; } @@ -848,7 +879,8 @@ IndexSchema Builder::GetStatisticOidIndexSchema(db_oid_t db) { columns.back().SetOid(indexkeycol_oid_t(2)); // Primary - IndexSchema schema(columns, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + IndexSchema schema(columns, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); return schema; } diff --git a/src/catalog/postgres/pg_core_impl.cpp b/src/catalog/postgres/pg_core_impl.cpp index aa8cf7fab1..e07e215bdf 100644 --- a/src/catalog/postgres/pg_core_impl.cpp +++ b/src/catalog/postgres/pg_core_impl.cpp @@ -470,6 +470,7 @@ bool PgCoreImpl::CreateTableEntry(const common::ManagedPointer(PgClass::RelKind::REGULAR_TABLE)); + PgClass::RELOPTIONS.SetNull(delta, pm); PgClass::REL_SCHEMA.Set(delta, pm, nullptr); // Need to update once we've recreated the columns. PgClass::REL_PTR.SetNull(delta, pm); PgClass::REL_NEXTCOLOID.Set(delta, pm, next_col_oid); @@ -683,6 +684,9 @@ bool PgCoreImpl::CreateIndexEntry(const common::ManagedPointer(PgClass::RelKind::INDEX)); + + auto options_varlen = storage::StorageUtil::CreateVarlen(schema.GetIndexOptions().ToCatalogString()); + PgClass::RELOPTIONS.Set(delta, pm, options_varlen); PgClass::REL_SCHEMA.Set(delta, pm, nullptr); PgClass::REL_PTR.SetNull(delta, pm); // Set by execution layer after instantiation. PgClass::REL_NEXTCOLOID.SetNull(delta, pm); // Indexes don't need col_oid. @@ -801,8 +805,8 @@ bool PgCoreImpl::CreateIndexEntry(const common::ManagedPointer cols = GetColumns(txn, index_oid); - auto *new_schema = - new IndexSchema(cols, schema.Type(), schema.Unique(), schema.Primary(), schema.Exclusion(), schema.Immediate()); + auto *new_schema = new IndexSchema(cols, schema.Type(), schema.Unique(), schema.Primary(), schema.Exclusion(), + schema.Immediate(), schema.GetIndexOptions()); txn->RegisterAbortAction([=]() { delete new_schema; }); auto *const update_redo = txn->StageWrite(db_oid_, PgClass::CLASS_TABLE_OID, set_class_schema_pri_); diff --git a/src/execution/compiler/codegen.cpp b/src/execution/compiler/codegen.cpp index 2bb59830cc..eb0369a255 100644 --- a/src/execution/compiler/codegen.cpp +++ b/src/execution/compiler/codegen.cpp @@ -646,9 +646,9 @@ ast::Expr *CodeGen::TableIterClose(ast::Expr *table_iter) { ast::Expr *CodeGen::IterateTableParallel(catalog::table_oid_t table_oid, ast::Identifier col_oids, ast::Expr *query_state, ast::Expr *exec_ctx, ast::Identifier worker_name) { - ast::Expr *call = CallBuiltin( - ast::Builtin::TableIterParallel, - {Const32(table_oid.UnderlyingValue()), MakeExpr(col_oids), query_state, exec_ctx, MakeExpr(worker_name)}); + ast::Expr *call = + CallBuiltin(ast::Builtin::TableIterParallel, {Const32(table_oid.UnderlyingValue()), MakeExpr(col_oids), + query_state, exec_ctx, Const32(0), MakeExpr(worker_name)}); call->SetType(ast::BuiltinType::Get(context_, ast::BuiltinType::Nil)); return call; } diff --git a/src/execution/compiler/operator/index_create_translator.cpp b/src/execution/compiler/operator/index_create_translator.cpp index 0dcaab7765..07c032ab07 100644 --- a/src/execution/compiler/operator/index_create_translator.cpp +++ b/src/execution/compiler/operator/index_create_translator.cpp @@ -128,9 +128,18 @@ void IndexCreateTranslator::LaunchWork(FunctionBuilder *function, ast::Identifie function->Append(codegen->ExecCtxRegisterHook(exec_ctx, post, parallel_build_post_hook_fn_)); } + uint32_t num_threads_override = 0; + auto &index_options = GetPlanAs().GetSchema()->GetIndexOptions().GetOptions(); + if (index_options.find(catalog::IndexOptions::Knob::BUILD_THREADS) != index_options.end()) { + auto expr = index_options.find(catalog::IndexOptions::Knob::BUILD_THREADS)->second.get(); + auto cve = reinterpret_cast(expr); + num_threads_override = cve->Peek(); + } + ast::Expr *iter_table_parallel = codegen_->CallBuiltin( - ast::Builtin::TableIterParallel, {codegen_->Const32(table_oid_.UnderlyingValue()), global_col_oids_.Get(codegen_), - GetQueryStatePtr(), GetExecutionContext(), codegen_->MakeExpr(work_func)}); + ast::Builtin::TableIterParallel, + {codegen_->Const32(table_oid_.UnderlyingValue()), global_col_oids_.Get(codegen_), GetQueryStatePtr(), + GetExecutionContext(), codegen_->ConstU32(num_threads_override), codegen_->MakeExpr(work_func)}); iter_table_parallel->SetType(ast::BuiltinType::Get(codegen_->GetAstContext().Get(), ast::BuiltinType::Nil)); function->Append(iter_table_parallel); @@ -140,6 +149,14 @@ void IndexCreateTranslator::LaunchWork(FunctionBuilder *function, ast::Identifie } void IndexCreateTranslator::PerformPipelineWork(WorkContext *context, FunctionBuilder *function) const { + auto *codegen = GetCodeGen(); + if (IsPipelineMetricsEnabled()) { + function->Append(codegen->CallBuiltin( + ast::Builtin::ExecOUFeatureVectorFilter, + {GetPipeline()->OUFeatureVecPtr(), + codegen->Const32(static_cast(selfdriving::ExecutionOperatingUnitType::CREATE_INDEX))})); + } + const bool declare_local_tvi = !GetPipeline()->IsParallel(); if (declare_local_tvi) { DeclareTVI(function); @@ -151,8 +168,6 @@ void IndexCreateTranslator::PerformPipelineWork(WorkContext *context, FunctionBu if (declare_local_tvi) { function->Append(codegen_->TableIterClose(codegen_->MakeExpr(tvi_var_))); if (IsPipelineMetricsEnabled()) { - auto *codegen = GetCodeGen(); - // Get Memory Use auto *get_mem = codegen->CallBuiltin(ast::Builtin::StorageInterfaceGetIndexHeapSize, {local_storage_interface_.GetPtr(codegen_)}); diff --git a/src/execution/exec/execution_context.cpp b/src/execution/exec/execution_context.cpp index 2537b1b416..d69d615260 100644 --- a/src/execution/exec/execution_context.cpp +++ b/src/execution/exec/execution_context.cpp @@ -205,7 +205,8 @@ void ExecutionContext::InitializeParallelOUFeatureVector(selfdriving::ExecOUFeat vec->pipeline_features_->emplace_back(selfdriving::ExecutionOperatingUnitType::PARALLEL_SORT_TOPK_MERGE_STEP, feature); break; - case selfdriving::ExecutionOperatingUnitType::CREATE_INDEX: + case selfdriving::ExecutionOperatingUnitType::CREATE_INDEX_MAIN: + // Just copy the feature that has been setup already. vec->pipeline_features_->emplace_back(selfdriving::ExecutionOperatingUnitType::CREATE_INDEX_MAIN, feature); break; default: diff --git a/src/execution/sema/sema_builtin.cpp b/src/execution/sema/sema_builtin.cpp index cf2525a741..421e7fb89d 100644 --- a/src/execution/sema/sema_builtin.cpp +++ b/src/execution/sema/sema_builtin.cpp @@ -1330,7 +1330,7 @@ void Sema::CheckBuiltinTableIterCall(ast::CallExpr *call, ast::Builtin builtin) } void Sema::CheckBuiltinTableIterParCall(ast::CallExpr *call) { - if (!CheckArgCount(call, 5)) { + if (!CheckArgCount(call, 6)) { return; } @@ -1366,10 +1366,16 @@ void Sema::CheckBuiltinTableIterParCall(ast::CallExpr *call) { return; } - // The fifth argument is the scanner function. - auto *scan_fn_type = call_args[4]->GetType()->SafeAs(); + // The fifth argument is the override for number of threads + if (!call_args[4]->GetType()->IsIntegerType()) { + ReportIncorrectCallArg(call, 4, "Fifth argument should be an integer type."); + return; + } + + // The sixth argument is the scanner function. + auto *scan_fn_type = call_args[5]->GetType()->SafeAs(); if (scan_fn_type == nullptr) { - GetErrorReporter()->Report(call->Position(), ErrorMessages::kBadParallelScanFunction, call_args[4]->GetType()); + GetErrorReporter()->Report(call->Position(), ErrorMessages::kBadParallelScanFunction, call_args[5]->GetType()); return; } // Check the type of the scanner function parameters. See TableVectorIterator::ScanFn. @@ -1379,7 +1385,7 @@ void Sema::CheckBuiltinTableIterParCall(ast::CallExpr *call) { || !params[0].type_->IsPointerType() // QueryState, must contain execCtx. || !params[1].type_->IsPointerType() // Thread state. || !IsPointerToSpecificBuiltin(params[2].type_, tvi_kind)) { // TableVectorIterator. - GetErrorReporter()->Report(call->Position(), ErrorMessages::kBadParallelScanFunction, call_args[4]->GetType()); + GetErrorReporter()->Report(call->Position(), ErrorMessages::kBadParallelScanFunction, call_args[5]->GetType()); return; } diff --git a/src/execution/sql/ddl_executors.cpp b/src/execution/sql/ddl_executors.cpp index 5e4d5e1e79..7d9a1322fe 100644 --- a/src/execution/sql/ddl_executors.cpp +++ b/src/execution/sql/ddl_executors.cpp @@ -65,7 +65,8 @@ bool DDLExecutors::CreateTableExecutor(const common::ManagedPointerGetNamespaceOid(), unique_constraint.constraint_name_, table_oid, diff --git a/src/execution/sql/table_vector_iterator.cpp b/src/execution/sql/table_vector_iterator.cpp index 6b77276de7..a86f427c6e 100644 --- a/src/execution/sql/table_vector_iterator.cpp +++ b/src/execution/sql/table_vector_iterator.cpp @@ -136,7 +136,8 @@ class ScanTask { bool TableVectorIterator::ParallelScan(uint32_t table_oid, uint32_t *col_oids, uint32_t num_oids, void *const query_state, exec::ExecutionContext *exec_ctx, - const TableVectorIterator::ScanFn scan_fn, const uint32_t min_grain_size) { + uint32_t num_threads_override, const TableVectorIterator::ScanFn scan_fn, + const uint32_t min_grain_size) { // Lookup table const auto table = exec_ctx->GetAccessor()->GetTable(catalog::table_oid_t{table_oid}); if (table == nullptr) { @@ -149,6 +150,10 @@ bool TableVectorIterator::ParallelScan(uint32_t table_oid, uint32_t *col_oids, u // Execute parallel scan size_t num_threads = std::max(exec_ctx->GetExecutionSettings().GetNumberOfParallelExecutionThreads(), 0); + if (num_threads_override != 0) { + num_threads = num_threads_override; + } + size_t num_tasks = std::ceil(table->table_.data_table_->GetNumBlocks() * 1.0 / min_grain_size); size_t concurrent = std::min(num_threads, num_tasks); exec_ctx->SetNumConcurrentEstimate(concurrent); diff --git a/src/execution/vm/bytecode_emitter.cpp b/src/execution/vm/bytecode_emitter.cpp index 7123e35fae..7678d4610c 100644 --- a/src/execution/vm/bytecode_emitter.cpp +++ b/src/execution/vm/bytecode_emitter.cpp @@ -297,8 +297,10 @@ void BytecodeEmitter::EmitTempTableIterInit(Bytecode bytecode, LocalVar iter, Lo } void BytecodeEmitter::EmitParallelTableScan(LocalVar table_oid, LocalVar col_oids, uint32_t num_oids, - LocalVar query_state, LocalVar exec_ctx, FunctionId scan_fn) { - EmitAll(Bytecode::ParallelScanTable, table_oid, col_oids, num_oids, query_state, exec_ctx, scan_fn); + LocalVar query_state, LocalVar exec_ctx, LocalVar num_threads_override, + FunctionId scan_fn) { + EmitAll(Bytecode::ParallelScanTable, table_oid, col_oids, num_oids, query_state, exec_ctx, num_threads_override, + scan_fn); } void BytecodeEmitter::EmitRegisterHook(LocalVar exec_ctx, LocalVar hook_idx, FunctionId hook_fn) { diff --git a/src/execution/vm/bytecode_generator.cpp b/src/execution/vm/bytecode_generator.cpp index 1bd8b02bbf..07d8948d2e 100644 --- a/src/execution/vm/bytecode_generator.cpp +++ b/src/execution/vm/bytecode_generator.cpp @@ -708,11 +708,13 @@ void BytecodeGenerator::VisitBuiltinTableIterParallelCall(ast::CallExpr *call) { LocalVar query_state = VisitExpressionForRValue(call->Arguments()[2]); // The fourth argument should be the execution context. LocalVar exec_ctx = VisitExpressionForRValue(call->Arguments()[3]); - // The fifth argument is the scan function as an identifier. - const auto scan_fn_name = call->Arguments()[4]->As()->Name(); + // The fifth argument is the number of threads for override + LocalVar num_threads_override = VisitExpressionForRValue(call->Arguments()[4]); + // The sixth argument is the scan function as an identifier. + const auto scan_fn_name = call->Arguments()[5]->As()->Name(); // Emit the bytecode. GetEmitter()->EmitParallelTableScan(table_oid, col_oids, static_cast(arr_type->GetLength()), query_state, - exec_ctx, LookupFuncIdByName(scan_fn_name.GetData())); + exec_ctx, num_threads_override, LookupFuncIdByName(scan_fn_name.GetData())); } void BytecodeGenerator::VisitBuiltinVPICall(ast::CallExpr *call, ast::Builtin builtin) { diff --git a/src/execution/vm/vm.cpp b/src/execution/vm/vm.cpp index 888e73b710..77028fb9a1 100644 --- a/src/execution/vm/vm.cpp +++ b/src/execution/vm/vm.cpp @@ -684,10 +684,11 @@ void VM::Interpret(const uint8_t *ip, Frame *frame) { // NOLINT auto num_oids = READ_UIMM4(); auto query_state = frame->LocalAt(READ_LOCAL_ID()); auto *exec_context = frame->LocalAt(READ_LOCAL_ID()); + auto num_threads_override = frame->LocalAt(READ_LOCAL_ID()); auto scan_fn_id = READ_FUNC_ID(); auto scan_fn = reinterpret_cast(module_->GetRawFunctionImpl(scan_fn_id)); - OpParallelScanTable(table_oid, col_oids, num_oids, query_state, exec_context, scan_fn); + OpParallelScanTable(table_oid, col_oids, num_oids, query_state, exec_context, num_threads_override, scan_fn); DISPATCH_NEXT(); } diff --git a/src/include/catalog/index_schema.h b/src/include/catalog/index_schema.h index d27d292950..0196c15c5c 100644 --- a/src/include/catalog/index_schema.h +++ b/src/include/catalog/index_schema.h @@ -1,9 +1,13 @@ #pragma once +#include #include +#include #include +#include #include #include +#include #include #include @@ -12,6 +16,7 @@ #include "common/macros.h" #include "parser/expression/abstract_expression.h" #include "parser/expression/column_value_expression.h" +#include "parser/expression/constant_value_expression.h" #include "storage/index/index_defs.h" #include "type/type_id.h" #include "type/type_util.h" @@ -33,6 +38,224 @@ class Builder; class PgCoreImpl; } // namespace postgres +/** + * A class used to represent valid knobs and options that have + * been passed to the CREATE INDEX SQL statement. + */ +class IndexOptions { + public: + /** + * Specific knobs that can be specified + */ + enum Knob { + /** Number of threads to use for building the index */ + BUILD_THREADS, + + /** B+Tree inner node split threshold */ + BPLUSTREE_INNER_NODE_UPPER_THRESHOLD, + /** B+Tree Inner node merge threshold */ + BPLUSTREE_INNER_NODE_LOWER_THRESHOLD, + + UNKNOWN + }; + + /** + * Converts a string input into a specific Knob enum. + * If no valid conversion can be found, Knob::UNKNOWN is returned. + * @param option string to convert + * @return converted enum or UNKNOWN + */ + static IndexOptions::Knob ConvertToOptionKnob(std::string option) { + Knob knob = UNKNOWN; + std::transform(option.begin(), option.end(), option.begin(), ::toupper); + if (option == "BUILD_THREADS") { + knob = BUILD_THREADS; + } else if (option == "BPLUSTREE_INNER_NODE_UPPER_THRESHOLD") { + knob = BPLUSTREE_INNER_NODE_UPPER_THRESHOLD; + } else if (option == "BPLUSTREE_INNER_NODE_LOWER_THRESHOLD") { + knob = BPLUSTREE_INNER_NODE_LOWER_THRESHOLD; + } + return knob; + } + + /** + * Converts a knob into its string representation. + * @param val knob to convert to string + * @return string representation of the enum + */ + static std::string ConvertOptionKnobToString(IndexOptions::Knob val) { + switch (val) { + case BUILD_THREADS: + return "BUILD_THREADS"; + case BPLUSTREE_INNER_NODE_UPPER_THRESHOLD: + return "BPLUSTREE_INNER_NODE_UPPER_THRESHOLD"; + case BPLUSTREE_INNER_NODE_LOWER_THRESHOLD: + return "BPLUSTREE_INNER_NODE_LOWER_THRESHOLD"; + case UNKNOWN: + default: + return "UNKNOWN"; + } + } + + /** + * Returns the expected type of the knob's value + * @param val knob to heck + * @return type of the knob's value + */ + static type::TypeId ExpectedTypeForKnob(Knob val) { + switch (val) { + case BUILD_THREADS: + return type::TypeId::INTEGER; + case BPLUSTREE_INNER_NODE_UPPER_THRESHOLD: + return type::TypeId::INTEGER; + case BPLUSTREE_INNER_NODE_LOWER_THRESHOLD: + return type::TypeId::INTEGER; + case UNKNOWN: + default: + return type::TypeId::INVALID; + } + } + + /** + * Adds an option to be tracked in the IndexOptions data structure + * @param option Knob to insert (much be unique in tracking) + * @param value Value of the knob + */ + void AddOption(Knob option, std::unique_ptr value) { + NOISEPAGE_ASSERT(options_.find(option) == options_.end(), "Adding duplicate option to INDEX options"); + options_[option] = std::move(value); + } + + /** Default constructor */ + IndexOptions() = default; + + /** + * Constructor by reference + * @param other IndexOptions to copy from + */ + IndexOptions(const IndexOptions &other) { + for (const auto &pair : other.options_) { + AddOption(pair.first, pair.second->Copy()); + } + } + + /** + * Constructor by copy assignment + * @param other IndexOptions to copy from + */ + IndexOptions &operator=(const IndexOptions &other) { + for (auto &option : other.GetOptions()) { + AddOption(option.first, option.second->Copy()); + } + + return *this; + } + + /** + * Constructor by move assignment + * @param other IndexOptions to move from + */ + IndexOptions &operator=(IndexOptions &&other) noexcept { + options_ = std::move(other.options_); + return *this; + } + + /** + * Checks for equality with another IndexOptions + * @param other IndexOptions to check equality + * @return equal to other or not + */ + bool operator==(const IndexOptions &other) const { + std::unordered_set current_options; + std::unordered_set other_options; + for (const auto &pair : options_) current_options.insert(pair.first); + for (const auto &pair : other.options_) other_options.insert(pair.first); + if (current_options != other_options) return false; + + for (const auto &pair : options_) { + auto it = other.options_.find(pair.first); + if ((*pair.second) != (*it->second)) return false; + } + + return true; + } + + /** + * Checks for inequality with another IndexOptions + * @param other IndexOptions to check inequality against + * @return not equal or equal + */ + bool operator!=(const IndexOptions &other) const { return !(*this == other); } + + /** @return hash */ + common::hash_t Hash() const { + common::hash_t hash = 0; + for (const auto &pair : options_) { + hash = common::HashUtil::CombineHashes(hash, common::HashUtil::Hash(pair.first)); + hash = common::HashUtil::CombineHashes(hash, pair.second->Hash()); + } + return hash; + } + + /** @return the options stored */ + const std::map> &GetOptions() const { return options_; } + + /** Converts the IndexOptions to a nlohmann::json object */ + nlohmann::json ToJson() const; + /** Loads the IndexOptions with the data of a nlohmann::json object */ + void FromJson(const nlohmann::json &j); + + /** + * Serializes the IndexOptions into a string to be stored in the catalog + * Function serializes options in format of "knob1=value1 knob2=value2". + * @return string for storing in catalog + */ + std::string ToCatalogString() const { + std::stringstream sstream; + for (auto &option : options_) { + auto cve = reinterpret_cast(option.second.get()); + sstream << ConvertOptionKnobToString(option.first) << "=" << cve->ToString() << " "; + } + + return sstream.str(); + } + + /** + * Instantiates the IndexOptions from a string stashed in the catalog. + * Function expects options to be of form "knob1=value1 knob2=value2..." + * @return options string from the catalog + */ + void FromCatalogString(const std::string &options) { + std::stringstream sstream(options); + std::vector tokens; + { + while (sstream.good()) { + std::string token; + std::getline(sstream, token, ' '); + tokens.push_back(token); + } + } + + for (auto &token : tokens) { + auto pos = token.find('='); + if (pos != std::string::npos) { + auto option = token.substr(0, pos); + auto set_val = token.substr(pos, token.size()); + Knob val = ConvertToOptionKnob(option); + NOISEPAGE_ASSERT(val != UNKNOWN, "Invalid IndexOptions::Knob serialized"); + + auto type = ExpectedTypeForKnob(val); + options_[val] = std::make_unique( + parser::ConstantValueExpression::FromString(set_val, type)); + } + } + } + + private: + /** Map that stores knobs and the knob values */ + std::map> options_; +}; + /** * A schema for an index. It contains the definitions for the columns in the * key as well as additional metdata. @@ -267,15 +490,18 @@ class IndexSchema { * @param is_primary indicating whether this will be the index for a primary key * @param is_exclusion indicating whether this index is for exclusion constraints * @param is_immediate indicating that the uniqueness check fails at insertion time + * @param index_options that are options for building the index */ IndexSchema(std::vector columns, const storage::index::IndexType type, const bool is_unique, - const bool is_primary, const bool is_exclusion, const bool is_immediate) + const bool is_primary, const bool is_exclusion, const bool is_immediate, + const IndexOptions &index_options) : columns_(std::move(columns)), type_(type), is_unique_(is_unique), is_primary_(is_primary), is_exclusion_(is_exclusion), - is_immediate_(is_immediate) { + is_immediate_(is_immediate), + index_options_(index_options) { NOISEPAGE_ASSERT((is_primary && is_unique) || (!is_primary), "is_primary requires is_unique to be true as well."); ExtractIndexedColOids(); } @@ -366,8 +592,13 @@ class IndexSchema { } /** - * @warning Calling this function will traverse the entire expression tree for each column, which may be expensive for - * large expressions. Thus, it should only be called once during object construction. + * @return index options + */ + const IndexOptions &GetIndexOptions() const { return index_options_; } + + /** + * @warning Calling this function will traverse the entire expression tree for each column, which may be expensive + * for large expressions. Thus, it should only be called once during object construction. * @return col oids in index keys, ordered by index key */ void ExtractIndexedColOids() { @@ -411,6 +642,7 @@ class IndexSchema { hash = common::HashUtil::CombineHashes(hash, common::HashUtil::Hash(is_primary_)); hash = common::HashUtil::CombineHashes(hash, common::HashUtil::Hash(is_exclusion_)); hash = common::HashUtil::CombineHashes(hash, common::HashUtil::Hash(is_immediate_)); + hash = common::HashUtil::CombineHashes(hash, index_options_.Hash()); return hash; } @@ -427,7 +659,8 @@ class IndexSchema { if (is_immediate_ != rhs.is_immediate_) return false; // TODO(Ling): Does column order matter for compare equal? if (indexed_oids_ != rhs.indexed_oids_) return false; - return columns_ == rhs.columns_; + if (columns_ != rhs.columns_) return false; + return index_options_ == rhs.index_options_; } /** @@ -451,8 +684,10 @@ class IndexSchema { bool is_primary_; bool is_exclusion_; bool is_immediate_; + IndexOptions index_options_; }; +DEFINE_JSON_HEADER_DECLARATIONS(IndexOptions); DEFINE_JSON_HEADER_DECLARATIONS(IndexSchema::Column); DEFINE_JSON_HEADER_DECLARATIONS(IndexSchema); diff --git a/src/include/catalog/postgres/pg_class.h b/src/include/catalog/postgres/pg_class.h index 8a430fbc61..f1fd51ab69 100644 --- a/src/include/catalog/postgres/pg_class.h +++ b/src/include/catalog/postgres/pg_class.h @@ -54,17 +54,19 @@ class PgClass { static constexpr CatalogColumnDef RELOID{col_oid_t{1}}; // INTEGER (pkey) static constexpr CatalogColumnDef RELNAME{col_oid_t{2}}; // VARCHAR static constexpr CatalogColumnDef RELNAMESPACE{ - col_oid_t{3}}; // INTEGER (fkey: pg_namespace) - static constexpr CatalogColumnDef RELKIND{col_oid_t{4}}; // CHAR - static constexpr CatalogColumnDef REL_SCHEMA{col_oid_t{5}}; // BIGINT (assumes 64-bit pointers) + col_oid_t{3}}; // INTEGER (fkey: pg_namespace) + static constexpr CatalogColumnDef RELKIND{col_oid_t{4}}; // CHAR + static constexpr CatalogColumnDef RELOPTIONS{col_oid_t{5}}; // VARCHAR + static constexpr CatalogColumnDef REL_SCHEMA{col_oid_t{6}}; // BIGINT (assumes 64-bit pointers) static constexpr CatalogColumnDef REL_PTR{ - col_oid_t{6}}; // BIGINT (assumes 64-bit pointers) - static constexpr CatalogColumnDef REL_NEXTCOLOID{col_oid_t{7}}; // INTEGER + col_oid_t{7}}; // BIGINT (assumes 64-bit pointers) + static constexpr CatalogColumnDef REL_NEXTCOLOID{col_oid_t{8}}; // INTEGER - static constexpr uint8_t NUM_PG_CLASS_COLS = 7; + static constexpr uint8_t NUM_PG_CLASS_COLS = 8; static constexpr std::array PG_CLASS_ALL_COL_OIDS = { - RELOID.oid_, RELNAME.oid_, RELNAMESPACE.oid_, RELKIND.oid_, REL_SCHEMA.oid_, REL_PTR.oid_, REL_NEXTCOLOID.oid_}; + RELOID.oid_, RELNAME.oid_, RELNAMESPACE.oid_, RELKIND.oid_, + RELOPTIONS.oid_, REL_SCHEMA.oid_, REL_PTR.oid_, REL_NEXTCOLOID.oid_}; }; } // namespace noisepage::catalog::postgres diff --git a/src/include/execution/sql/ddl_executors.h b/src/include/execution/sql/ddl_executors.h index 85bfafa3c8..21931f7a70 100644 --- a/src/include/execution/sql/ddl_executors.h +++ b/src/include/execution/sql/ddl_executors.h @@ -4,6 +4,7 @@ #include "catalog/catalog_defs.h" #include "common/managed_pointer.h" +#include "storage/index/index_builder.h" namespace noisepage::planner { class CreateDatabasePlanNode; class CreateNamespacePlanNode; diff --git a/src/include/execution/sql/table_vector_iterator.h b/src/include/execution/sql/table_vector_iterator.h index 247bb88ad1..e3f8cc5c70 100644 --- a/src/include/execution/sql/table_vector_iterator.h +++ b/src/include/execution/sql/table_vector_iterator.h @@ -123,11 +123,12 @@ class EXPORT TableVectorIterator { * ThreadStateContainer for all thread states, where it is assumed that the * container has been configured for size, construction, and destruction * before this invocation. + * @param num_threads_override If non-zero, specifies the number of threads to use * @param scan_fn The callback function invoked for vectors of table input. * @param min_grain_size The minimum number of blocks to give a scan task. */ static bool ParallelScan(uint32_t table_oid, uint32_t *col_oids, uint32_t num_oids, void *query_state, - exec::ExecutionContext *exec_ctx, ScanFn scan_fn, + exec::ExecutionContext *exec_ctx, uint32_t num_threads_override, ScanFn scan_fn, uint32_t min_grain_size = K_MIN_BLOCK_RANGE_SIZE); private: diff --git a/src/include/execution/vm/bytecode_emitter.h b/src/include/execution/vm/bytecode_emitter.h index 96140ce863..f5d532b519 100644 --- a/src/include/execution/vm/bytecode_emitter.h +++ b/src/include/execution/vm/bytecode_emitter.h @@ -324,7 +324,7 @@ class BytecodeEmitter { /** Emit a parallel table scan. */ void EmitParallelTableScan(LocalVar table_oid, LocalVar col_oids, uint32_t num_oids, LocalVar query_state, - LocalVar exec_ctx, FunctionId scan_fn); + LocalVar exec_ctx, LocalVar num_threads_override, FunctionId scan_fn); /** Emit a register hook function. */ void EmitRegisterHook(LocalVar exec_ctx, LocalVar hook_idx, FunctionId hook_fn); diff --git a/src/include/execution/vm/bytecode_handlers.h b/src/include/execution/vm/bytecode_handlers.h index 67d042969d..aa05cc3f5c 100644 --- a/src/include/execution/vm/bytecode_handlers.h +++ b/src/include/execution/vm/bytecode_handlers.h @@ -322,9 +322,10 @@ VM_OP_HOT void OpTableVectorIteratorGetVPI(noisepage::execution::sql::VectorProj VM_OP_HOT void OpParallelScanTable(uint32_t table_oid, uint32_t *col_oids, uint32_t num_oids, void *const query_state, noisepage::execution::exec::ExecutionContext *exec_ctx, + uint32_t num_threads_override, const noisepage::execution::sql::TableVectorIterator::ScanFn scanner) { noisepage::execution::sql::TableVectorIterator::ParallelScan(table_oid, col_oids, num_oids, query_state, exec_ctx, - scanner); + num_threads_override, scanner); } // --------------------------------------------------------- diff --git a/src/include/execution/vm/bytecodes.h b/src/include/execution/vm/bytecodes.h index 982e7ae09c..51fda63006 100644 --- a/src/include/execution/vm/bytecodes.h +++ b/src/include/execution/vm/bytecodes.h @@ -136,7 +136,7 @@ namespace noisepage::execution::vm { F(TableVectorIteratorGetVPINumTuples, OperandType::Local, OperandType::Local) \ F(TableVectorIteratorGetVPI, OperandType::Local, OperandType::Local) \ F(ParallelScanTable, OperandType::Local, OperandType::Local, OperandType::UImm4, OperandType::Local, \ - OperandType::Local, OperandType::FunctionId) \ + OperandType::Local, OperandType::Local, OperandType::FunctionId) \ \ F(CteScanInit, OperandType::Local, OperandType::Local, OperandType::Local, OperandType::Local, OperandType::Local, \ OperandType::UImm4) \ diff --git a/src/include/metrics/pipeline_metric.h b/src/include/metrics/pipeline_metric.h index ab326c7da9..629b0700db 100644 --- a/src/include/metrics/pipeline_metric.h +++ b/src/include/metrics/pipeline_metric.h @@ -68,6 +68,8 @@ class PipelineMetricRawData : public AbstractRawData { outfile << data.GetMemFactorsVectorString() << ", "; outfile << data.GetNumLoopsVectorString() << ", "; outfile << data.GetNumConcurrentVectorString() << ", "; + outfile << data.GetSpecificFeature0VectorString() << ", "; + outfile << data.GetSpecificFeature1VectorString() << ", "; data.resource_metrics_.ToCSV(outfile); outfile << std::endl; @@ -86,7 +88,7 @@ class PipelineMetricRawData : public AbstractRawData { */ static constexpr std::array FEATURE_COLUMNS = { "query_id, pipeline_id, num_features, features, cpu_freq, exec_mode, num_rows, key_sizes, num_keys, " - "est_cardinalities, mem_factor, num_loops, num_concurrent"}; + "est_cardinalities, mem_factor, num_loops, num_concurrent, specific_feature0, specific_feature1"}; private: friend class PipelineMetric; @@ -188,6 +190,22 @@ class PipelineMetricRawData : public AbstractRawData { return ConcatVectorToString(num_concurrent); } + std::string GetSpecificFeature0VectorString() { + std::vector spec; + for (auto &feature : features_) { + spec.emplace_back(feature.GetSpecificFeature0()); + } + return ConcatVectorToString(spec); + } + + std::string GetSpecificFeature1VectorString() { + std::vector spec; + for (auto &feature : features_) { + spec.emplace_back(feature.GetSpecificFeature1()); + } + return ConcatVectorToString(spec); + } + const execution::query_id_t query_id_; const execution::pipeline_id_t pipeline_id_; const uint8_t execution_mode_; diff --git a/src/include/optimizer/logical_operators.h b/src/include/optimizer/logical_operators.h index 534fa5be80..14331e7336 100644 --- a/src/include/optimizer/logical_operators.h +++ b/src/include/optimizer/logical_operators.h @@ -1280,12 +1280,14 @@ class LogicalCreateIndex : public OperatorNodeContents { * @param unique If the index to be created should be unique * @param index_name Name of the index * @param index_attrs Attributes of the index + * @param index_options Index options * @return */ static Operator Make(catalog::db_oid_t database_oid, catalog::namespace_oid_t namespace_oid, catalog::table_oid_t table_oid, parser::IndexType index_type, bool unique, std::string index_name, - std::vector> index_attrs); + std::vector> index_attrs, + catalog::IndexOptions index_options); /** * Copy @@ -1331,6 +1333,11 @@ class LogicalCreateIndex : public OperatorNodeContents { */ const std::vector> &GetIndexAttr() const { return index_attrs_; } + /** + * @return index options + */ + const catalog::IndexOptions &GetIndexOptions() const { return index_options_; } + private: /** * OID of the database @@ -1366,6 +1373,11 @@ class LogicalCreateIndex : public OperatorNodeContents { * Index attributes */ std::vector> index_attrs_; + + /** + * Index options + */ + catalog::IndexOptions index_options_; }; /** diff --git a/src/include/parser/create_statement.h b/src/include/parser/create_statement.h index d684629756..1975233e66 100644 --- a/src/include/parser/create_statement.h +++ b/src/include/parser/create_statement.h @@ -12,6 +12,7 @@ #include "parser/parser_defs.h" #include "parser/select_statement.h" #include "parser/sql_statement.h" +#include "storage/index/index_builder.h" namespace noisepage { namespace parser { @@ -411,15 +412,17 @@ class CreateStatement : public TableRefStatement { * @param unique true if index should be unique, false otherwise * @param index_name index name * @param index_attrs index attributes + * @param index_options index options */ CreateStatement(std::unique_ptr table_info, IndexType index_type, bool unique, std::string index_name, - std::vector index_attrs) + std::vector index_attrs, const catalog::IndexOptions &index_options) : TableRefStatement(StatementType::CREATE, std::move(table_info)), create_type_(kIndex), index_type_(index_type), unique_index_(unique), index_name_(std::move(index_name)), - index_attrs_(std::move(index_attrs)) {} + index_attrs_(std::move(index_attrs)), + index_options_(index_options) {} /** * CREATE SCHEMA @@ -504,6 +507,9 @@ class CreateStatement : public TableRefStatement { /** @return index attributes for [CREATE INDEX] */ const std::vector &GetIndexAttributes() const { return index_attrs_; } + /** @return move index options for [CREATE INDEX] */ + catalog::IndexOptions &&MoveIndexOptions() { return std::move(index_options_); } + /** @return true if "IF NOT EXISTS" for [CREATE SCHEMA], false otherwise */ bool IsIfNotExists() { return if_not_exists_; } @@ -544,6 +550,7 @@ class CreateStatement : public TableRefStatement { const bool unique_index_ = false; const std::string index_name_; const std::vector index_attrs_; + catalog::IndexOptions index_options_; // CREATE SCHEMA const bool if_not_exists_ = false; diff --git a/src/include/self_driving/modeling/operating_unit.h b/src/include/self_driving/modeling/operating_unit.h index f0f825be10..3ad69e03f3 100644 --- a/src/include/self_driving/modeling/operating_unit.h +++ b/src/include/self_driving/modeling/operating_unit.h @@ -99,10 +99,13 @@ class ExecutionOperatingUnitFeature { * @param mem_factor Memory adjustment factor * @param num_loops Number of loops * @param num_concurrent Number of concurrent tasks (including current one) + * @param specific_feature0 A feature specific feature value + * @param specific_feature1 A (second) feature specific feature value */ ExecutionOperatingUnitFeature(execution::translator_id_t translator_id, ExecutionOperatingUnitType feature, size_t num_rows, size_t key_size, size_t num_keys, size_t cardinality, - double mem_factor, size_t num_loops, size_t num_concurrent) + double mem_factor, size_t num_loops, size_t num_concurrent, size_t specific_feature0, + size_t specific_feature1) : translator_id_(translator_id), feature_id_(feature_id_counter++), feature_(feature), @@ -112,11 +115,12 @@ class ExecutionOperatingUnitFeature { cardinality_(cardinality), mem_factors_({mem_factor}), num_loops_(num_loops), - num_concurrent_(num_concurrent) {} + num_concurrent_(num_concurrent), + specific_feature0_(specific_feature0), + specific_feature1_(specific_feature1) {} /** * Constructor for ExecutionOperatingUnitFeature from an existing feature - * @note Does not copy num_rows, cardinality * * @param feature Newly created OU type * @param other Existing OU to copy information from @@ -125,16 +129,18 @@ class ExecutionOperatingUnitFeature { : translator_id_(other.translator_id_), feature_id_(other.feature_id_), feature_(feature), - num_rows_(0), + num_rows_(other.num_rows_), key_size_(other.key_size_), num_keys_(other.num_keys_), - cardinality_(0), + cardinality_(other.cardinality_), mem_factors_(other.mem_factors_), num_loops_(other.num_loops_), - num_concurrent_(other.num_concurrent_) {} + num_concurrent_(other.num_concurrent_), + specific_feature0_(other.specific_feature0_), + specific_feature1_(other.specific_feature1_) {} /** - * Returns a vector of doubles consisting of 7 features starting with num_rows + * Returns a vector of doubles consisting of 9 features starting with num_rows */ void GetAllAttributes(std::vector *all_attributes) const { all_attributes->push_back(num_rows_); @@ -144,6 +150,8 @@ class ExecutionOperatingUnitFeature { all_attributes->push_back(GetMemFactor()); all_attributes->push_back(num_loops_); all_attributes->push_back(num_concurrent_); + all_attributes->push_back(specific_feature0_); + all_attributes->push_back(specific_feature1_); } /** @return The ID of the translator for this ExecutionOperatingUnitFeature. */ @@ -250,6 +258,16 @@ class ExecutionOperatingUnitFeature { */ size_t GetNumLoops() const { return num_loops_; } + /** + * @return feature specific value 0 + */ + size_t GetSpecificFeature0() const { return specific_feature0_; } + + /** + * @return feature specific value 1 + */ + size_t GetSpecificFeature1() const { return specific_feature1_; } + private: /** * Set the estimated number of output tuples @@ -297,6 +315,8 @@ class ExecutionOperatingUnitFeature { std::vector mem_factors_; size_t num_loops_; size_t num_concurrent_; + size_t specific_feature0_; + size_t specific_feature1_; }; /** diff --git a/src/include/self_driving/modeling/operating_unit_defs.h b/src/include/self_driving/modeling/operating_unit_defs.h index 76be97317c..f8c1ff9d39 100644 --- a/src/include/self_driving/modeling/operating_unit_defs.h +++ b/src/include/self_driving/modeling/operating_unit_defs.h @@ -67,6 +67,9 @@ enum class ExecutionOperatingUnitType : uint32_t { * IDX_SCAN * num_rows: size of index * cardinality: size of scan + * + * specific_feature0: upper size threshold for b+tree + * specific_feature1: lower size threshold for b+tree */ IDX_SCAN, @@ -120,6 +123,9 @@ enum class ExecutionOperatingUnitType : uint32_t { * num_rows: index size * cardinality (training): batch number of indexes * cardinality (inference): number of index inserts or deletes + * + * specific_feature0: upper size threshold for b+tree + * specific_feature1: lower size threshold for b+tree */ INDEX_INSERT, INDEX_DELETE, @@ -130,6 +136,11 @@ enum class ExecutionOperatingUnitType : uint32_t { PARALLEL_SORT_MERGE_STEP, PARALLEL_SORT_TOPK_STEP, PARALLEL_SORT_TOPK_MERGE_STEP, + + /** + * specific_feature0: upper size threshold for b+tree + * specific_feature1: lower size threshold for b+tree + */ CREATE_INDEX, CREATE_INDEX_MAIN, diff --git a/src/include/self_driving/modeling/operating_unit_recorder.h b/src/include/self_driving/modeling/operating_unit_recorder.h index 9f604a9771..9f898eac97 100644 --- a/src/include/self_driving/modeling/operating_unit_recorder.h +++ b/src/include/self_driving/modeling/operating_unit_recorder.h @@ -121,6 +121,21 @@ class OperatingUnitRecorder : planner::PlanVisitor { template void RecordIndexOperations(const std::vector &index_oids, catalog::table_oid_t table_oid); + /** + * Derive index specific features from the index schema + * @param schema Schema to derive features from + * @return pair of index specific features + */ + std::pair DeriveIndexSpecificFeatures(const catalog::IndexSchema &schema); + + /** + * Derive a valid value for the num_concurrent for a create index + * @param schema Schema of index being built + * @param tbl_oid Table OID of table being used + * @return estimate of num_concurrent + */ + size_t DeriveIndexBuildThreads(const catalog::IndexSchema &schema, catalog::table_oid_t tbl_oid); + template void RecordAggregateTranslator(common::ManagedPointer translator, const planner::AggregatePlanNode *plan); diff --git a/src/include/storage/index/bplustree.h b/src/include/storage/index/bplustree.h index 488e78d695..a9f6bfaa01 100644 --- a/src/include/storage/index/bplustree.h +++ b/src/include/storage/index/bplustree.h @@ -71,9 +71,9 @@ class BPlusTreeBase { protected: /** upper size threshold for inner node split [FAN_OUT] */ - int inner_node_size_upper_threshold_ = 128; + int inner_node_size_upper_threshold_ = DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD; /** lower size threshold for inner node removal [Ceil(FAN_OUT / 2) - 1] */ - int inner_node_size_lower_threshold_ = 63; + int inner_node_size_lower_threshold_ = DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD; /** upper size threshold for leaf node split [FAN_OUT] */ int leaf_node_size_upper_threshold_ = 128; @@ -81,6 +81,11 @@ class BPlusTreeBase { int leaf_node_size_lower_threshold_ = 64; public: + /** upper size threshold for inner node split [FAN_OUT] */ + static int constexpr DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD = 128; + /** lower size threshold for inner node removal [Ceil(FAN_OUT / 2) - 1] */ + static int constexpr DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD = 63; + /** * Constructor */ diff --git a/src/include/storage/index/bplustree_index.h b/src/include/storage/index/bplustree_index.h index a6c7fb043f..7eebb39405 100644 --- a/src/include/storage/index/bplustree_index.h +++ b/src/include/storage/index/bplustree_index.h @@ -43,6 +43,24 @@ class BPlusTreeIndex final : public Index { */ IndexType Type() const final { return IndexType::BPLUSTREE; } + /** + * Sets the B+Tree's inner node upper threshold (split) + * @param threshold Threshold to use for inner split + */ + void SetInnerNodeSizeUpperThreshold(int threshold); + + /** + * Sets the B+Tree's inner node lower threshold (merge) + * @param threshold Threshold to use for inner merge + */ + void SetInnerNodeSizeLowerThreshold(int threshold); + + /** @return inner node upper threshold (split) */ + int GetInnerNodeSizeUpperThreshold() const; + + /** @return inner node lower threshold (merge) */ + int GetInnerNodeSizeLowerThreshold() const; + /** * @return approximate number of bytes allocated on the heap for this index data structure */ diff --git a/src/include/storage/index/index_builder.h b/src/include/storage/index/index_builder.h index 04a644cfea..c8cefd5fd6 100644 --- a/src/include/storage/index/index_builder.h +++ b/src/include/storage/index/index_builder.h @@ -1,7 +1,15 @@ #pragma once +#include +#include +#include + #include "catalog/index_schema.h" +namespace noisepage::parser { +class AbstractExpression; +} + namespace noisepage::storage::index { class Index; @@ -29,6 +37,9 @@ class IndexBuilder { IndexBuilder &SetKeySchema(const catalog::IndexSchema &key_schema); private: + template + void ApplyIndexOptions(Index *index) const; + Index *BuildBwTreeIntsKey(IndexMetadata metadata) const; Index *BuildBwTreeGenericKey(IndexMetadata metadata) const; diff --git a/src/include/storage/sql_table.h b/src/include/storage/sql_table.h index f2d87dc065..160ba6c641 100644 --- a/src/include/storage/sql_table.h +++ b/src/include/storage/sql_table.h @@ -63,6 +63,11 @@ class SqlTable { */ ~SqlTable() { delete table_.data_table_; } + /** + * @return number of blocks + */ + size_t GetNumBlocks() { return table_.data_table_->GetNumBlocks(); } + /** * Materializes a single tuple from the given slot, as visible at the timestamp of the calling txn. * diff --git a/src/optimizer/logical_operators.cpp b/src/optimizer/logical_operators.cpp index 6376028a2b..37e326575c 100644 --- a/src/optimizer/logical_operators.cpp +++ b/src/optimizer/logical_operators.cpp @@ -847,12 +847,24 @@ bool LogicalCreateFunction::operator==(const BaseOperatorNodeContents &r) { //===--------------------------------------------------------------------===// // LogicalCreateIndex //===--------------------------------------------------------------------===// -BaseOperatorNodeContents *LogicalCreateIndex::Copy() const { return new LogicalCreateIndex(*this); } +BaseOperatorNodeContents *LogicalCreateIndex::Copy() const { + auto *op = new LogicalCreateIndex(); + op->database_oid_ = database_oid_; + op->namespace_oid_ = namespace_oid_; + op->table_oid_ = table_oid_; + op->index_type_ = index_type_; + op->unique_index_ = unique_index_; + op->index_name_ = index_name_; + op->index_attrs_ = index_attrs_; + op->index_options_ = catalog::IndexOptions(index_options_); + return op; +} Operator LogicalCreateIndex::Make(catalog::db_oid_t database_oid, catalog::namespace_oid_t namespace_oid, catalog::table_oid_t table_oid, parser::IndexType index_type, bool unique, std::string index_name, - std::vector> index_attrs) { + std::vector> index_attrs, + catalog::IndexOptions index_options) { auto *op = new LogicalCreateIndex(); op->database_oid_ = database_oid; op->namespace_oid_ = namespace_oid; @@ -861,6 +873,7 @@ Operator LogicalCreateIndex::Make(catalog::db_oid_t database_oid, catalog::names op->unique_index_ = unique; op->index_name_ = std::move(index_name); op->index_attrs_ = std::move(index_attrs); + op->index_options_ = std::move(index_options); return Operator(common::ManagedPointer(op)); } @@ -875,6 +888,7 @@ common::hash_t LogicalCreateIndex::Hash() const { for (const auto &attr : index_attrs_) { hash = common::HashUtil::CombineHashes(hash, attr->Hash()); } + hash = common::HashUtil::CombineHashes(hash, index_options_.Hash()); return hash; } @@ -891,7 +905,7 @@ bool LogicalCreateIndex::operator==(const BaseOperatorNodeContents &r) { for (size_t i = 0; i < index_attrs_.size(); i++) { if (*(index_attrs_[i]) != *(node.index_attrs_[i])) return false; } - return (true); + return index_options_ == node.index_options_; } //===--------------------------------------------------------------------===// diff --git a/src/optimizer/physical_operators.cpp b/src/optimizer/physical_operators.cpp index be4cefa18b..7c8bd22c1e 100644 --- a/src/optimizer/physical_operators.cpp +++ b/src/optimizer/physical_operators.cpp @@ -973,8 +973,9 @@ BaseOperatorNodeContents *CreateIndex::Copy() const { for (auto &col : schema_->GetColumns()) { columns.emplace_back(col); } - auto schema = std::make_unique(std::move(columns), schema_->Type(), schema_->Unique(), - schema_->Primary(), schema_->Exclusion(), schema_->Immediate()); + auto schema = + std::make_unique(std::move(columns), schema_->Type(), schema_->Unique(), schema_->Primary(), + schema_->Exclusion(), schema_->Immediate(), schema_->GetIndexOptions()); auto op = new CreateIndex(); op->namespace_oid_ = namespace_oid_; diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp index 59ba923d79..0fc7686c9a 100644 --- a/src/optimizer/plan_generator.cpp +++ b/src/optimizer/plan_generator.cpp @@ -989,8 +989,9 @@ void PlanGenerator::Visit(const CreateIndex *create_index) { for (const auto &col : schema->GetColumns()) { cols.emplace_back(col); } - auto idx_schema = std::make_unique(std::move(cols), schema->Type(), schema->Unique(), - schema->Primary(), schema->Exclusion(), schema->Immediate()); + auto idx_schema = + std::make_unique(std::move(cols), schema->Type(), schema->Unique(), schema->Primary(), + schema->Exclusion(), schema->Immediate(), schema->GetIndexOptions()); auto out_schema = std::make_unique(); output_plan_ = planner::CreateIndexPlanNode::Builder() diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp index 2719bc0422..2d9a03fd22 100644 --- a/src/optimizer/query_to_operator_transformer.cpp +++ b/src/optimizer/query_to_operator_transformer.cpp @@ -484,7 +484,7 @@ void QueryToOperatorTransformer::Visit(common::ManagedPointer( LogicalCreateIndex::Make(db_oid_, accessor_->GetDefaultNamespace(), accessor_->GetTableOid(op->GetTableName()), op->GetIndexType(), op->IsUniqueIndex(), - op->GetIndexName(), std::move(entries)) + op->GetIndexName(), std::move(entries), op->MoveIndexOptions()) .RegisterWithTxnContext(txn_context), std::vector>{}, txn_context); break; diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index fabca64fa5..74916a944a 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -928,9 +928,10 @@ void LogicalCreateIndexToPhysicalCreateIndex::Transform( } auto schema = std::make_unique(std::move(cols), idx_type, ci_op->IsUnique(), - false, // is_primary - false, // is_exclusion - false); // is_immediate + false, // is_primary + false, // is_exclusion + false, // is_immediate + ci_op->GetIndexOptions()); auto op = std::make_unique( CreateIndex::Make(ci_op->GetNamespaceOid(), ci_op->GetTableOid(), ci_op->GetIndexName(), std::move(schema)) diff --git a/src/parser/postgresparser.cpp b/src/parser/postgresparser.cpp index 4b186e3642..7f6050e5dc 100644 --- a/src/parser/postgresparser.cpp +++ b/src/parser/postgresparser.cpp @@ -234,6 +234,11 @@ std::unique_ptr PostgresParser::ExprTransform(ParseResult *p expr = AExprTransform(parse_result, reinterpret_cast(node)); break; } + case T_Integer: { + expr = std::make_unique( + type::TypeId::INTEGER, execution::sql::Integer(reinterpret_cast(node)->val_.ival_)); + break; + } default: { PARSER_LOG_DEBUG("ExprTransform: type {} unsupported", node->type); throw PARSER_EXCEPTION("ExprTransform: unsupported type"); @@ -1395,8 +1400,34 @@ std::unique_ptr PostgresParser::CreateIndexTransform(ParseResult * throw NOT_IMPLEMENTED_EXCEPTION("CreateIndexTransform error"); } + catalog::IndexOptions options; + if (root->options_ != nullptr) { + for (auto cell = root->options_->head; cell != nullptr; cell = cell->next) { + auto *arg = reinterpret_cast(cell->data.ptr_value); + char *name = arg->defname_; + NOISEPAGE_ASSERT(name && arg->arg_, "Invalid DefElem for CREATE INDEX option"); + auto option = catalog::IndexOptions::ConvertToOptionKnob(name); + if (option == catalog::IndexOptions::Knob::UNKNOWN) { + // We found an unsupported option, so skip. + PARSER_LOG_DEBUG("CreateIndexTransform: received unknown option {}", option); + continue; + } + + std::unique_ptr val = ExprTransform(parse_result, arg->arg_, name); + if (!val || val->GetExpressionType() != parser::ExpressionType::VALUE_CONSTANT) { + throw PARSER_EXCEPTION("CreateIndexTransform: non-scalar value for option"); + } + + if (val->GetReturnValueType() != catalog::IndexOptions::ExpectedTypeForKnob(option)) { + throw PARSER_EXCEPTION("CreateIndexTransform: incorrect type for option"); + } + + options.AddOption(option, std::move(val)); + } + } + return std::make_unique(std::move(table_info), index_type, unique, index_name, - std::move(index_attrs)); + std::move(index_attrs), std::move(options)); } // Postgres.CreateSchemaStmt -> noisepage.CreateStatement diff --git a/src/self_driving/modeling/operating_unit_recorder.cpp b/src/self_driving/modeling/operating_unit_recorder.cpp index 6553c9dc39..71dd10f681 100644 --- a/src/self_driving/modeling/operating_unit_recorder.cpp +++ b/src/self_driving/modeling/operating_unit_recorder.cpp @@ -7,11 +7,13 @@ #include "execution/ast/context.h" #include "execution/compiler/operator/hash_aggregation_translator.h" #include "execution/compiler/operator/hash_join_translator.h" +#include "execution/compiler/operator/index_create_translator.h" #include "execution/compiler/operator/operator_translator.h" #include "execution/compiler/operator/sort_translator.h" #include "execution/compiler/operator/static_aggregation_translator.h" #include "execution/sql/aggregators.h" #include "execution/sql/hash_table_entry.h" +#include "execution/sql/table_vector_iterator.h" #include "optimizer/index_util.h" #include "parser/expression/constant_value_expression.h" #include "parser/expression/function_expression.h" @@ -51,12 +53,57 @@ #include "self_driving/modeling/operating_unit.h" #include "self_driving/modeling/operating_unit_util.h" #include "storage/block_layout.h" +#include "storage/index/bplustree.h" #include "storage/index/index.h" #include "storage/sql_table.h" #include "type/type_id.h" namespace noisepage::selfdriving { +size_t OperatingUnitRecorder::DeriveIndexBuildThreads(const catalog::IndexSchema &schema, + catalog::table_oid_t tbl_oid) { + size_t num_threads = 1; + auto &options = schema.GetIndexOptions().GetOptions(); + if (options.find(catalog::IndexOptions::Knob::BUILD_THREADS) != options.end()) { + auto expr = options.find(catalog::IndexOptions::Knob::BUILD_THREADS)->second.get(); + auto cve = reinterpret_cast(expr); + num_threads = cve->Peek(); + } + + auto sql_table = accessor_->GetTable(tbl_oid); + size_t num_tasks = + std::ceil(sql_table->GetNumBlocks() * 1.0 / execution::sql::TableVectorIterator::K_MIN_BLOCK_RANGE_SIZE); + return std::min(num_tasks, num_threads); +} + +std::pair OperatingUnitRecorder::DeriveIndexSpecificFeatures(const catalog::IndexSchema &schema) { + if (schema.Type() != storage::index::IndexType::BPLUSTREE) { + return std::make_pair(0, 0); + } + + // For BPLUSTREE, feature0 is the upper threshold and feature1 is the lower threshold + size_t feature0 = 0; + size_t feature1 = 0; + auto &options = schema.GetIndexOptions().GetOptions(); + if (options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_UPPER_THRESHOLD) != options.end()) { + auto expr = options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_UPPER_THRESHOLD)->second.get(); + auto cve = reinterpret_cast(expr); + feature0 = cve->Peek(); + } else { + feature0 = storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_UPPER_THRESHOLD; + } + + if (options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_LOWER_THRESHOLD) != options.end()) { + auto expr = options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_LOWER_THRESHOLD)->second.get(); + auto cve = reinterpret_cast(expr); + feature1 = cve->Peek(); + } else { + feature1 = storage::index::BPlusTreeBase::DEFAULT_INNER_NODE_SIZE_LOWER_THRESHOLD; + } + + return std::make_pair(feature0, feature1); +} + template void OperatingUnitRecorder::RecordIndexOperations(const std::vector &index_oids, catalog::table_oid_t table_oid) { @@ -76,6 +123,7 @@ void OperatingUnitRecorder::RecordIndexOperations(const std::vectorGetIndexSchema(oid); + auto features = DeriveIndexSpecificFeatures(index_schema); // TODO(lin): Use the table size instead of the index size as the estiamte (since there may be "what-if" indexes // that we don't populate). We probably need to use the stats if the pilot is not running on the primary. @@ -88,8 +136,9 @@ void OperatingUnitRecorder::RecordIndexOperations(const std::vectorGetTranslatorId(), type, - num_rows, key_size, num_keys, car, 1, 0, 0)); + pipeline_features_.emplace( + type, ExecutionOperatingUnitFeature(current_translator_->GetTranslatorId(), type, num_rows, key_size, num_keys, + car, 1, 0, 0, features.first, features.second)); } } @@ -213,6 +262,9 @@ void OperatingUnitRecorder::AggregateFeatures(selfdriving::ExecutionOperatingUni size_t num_loops = 0; size_t num_concurrent = 0; // the number of concurrently executing threads (issue #1241) + size_t specific_feature0 = 0; + size_t specific_feature1 = 0; + // Dummy default values in case we don't have stats size_t current_plan_cardinality = 1; size_t table_num_rows = 1; @@ -280,11 +332,14 @@ void OperatingUnitRecorder::AggregateFeatures(selfdriving::ExecutionOperatingUni else num_rows = table_num_rows; + const auto &idx_schema = accessor_->GetIndexSchema(index_oid); + std::tie(specific_feature0, specific_feature1) = DeriveIndexSpecificFeatures(idx_schema); + std::vector mapped_cols; std::unordered_map lookup; UNUSED_ATTRIBUTE bool status = optimizer::IndexUtil::ConvertIndexKeyOidToColOid( - accessor_.Get(), table_oid, accessor_->GetIndexSchema(index_oid), &lookup, &mapped_cols); + accessor_.Get(), table_oid, idx_schema, &lookup, &mapped_cols); NOISEPAGE_ASSERT(status, "Failed to get index key oids in operating unit recorder"); if (plan_meta_data_ != nullptr) { @@ -308,6 +363,9 @@ void OperatingUnitRecorder::AggregateFeatures(selfdriving::ExecutionOperatingUni num_loops = plan_meta_data_->GetPlanNodeMetaData(c_plan->GetPlanNodeId()).GetCardinality(); } + const auto &idx_schema = accessor_->GetIndexSchema(index_join_plan->GetIndexOid()); + std::tie(specific_feature0, specific_feature1) = DeriveIndexSpecificFeatures(idx_schema); + // FIXME(lin): Right now we do not populate the cardinality or selectivity stats for the inner index scan of // INDEXNLJOIN. We directly get the size from the index and assume the inner index scan only returns 1 tuple. num_rows = index_join_plan->GetIndexSize(); @@ -324,6 +382,13 @@ void OperatingUnitRecorder::AggregateFeatures(selfdriving::ExecutionOperatingUni // the SORT_TOPK_BUILD OU. We need to refactor the interface to pass in this information correctly. cardinality = 1; } break; + case ExecutionOperatingUnitType::CREATE_INDEX_MAIN: { + // This OU does not record a num_concurrent. For this part, we only ensure that the + // num_rows and cardinality are set accordingly. This should be sufficient to allow + // downstream consumer to use this feature for prediction. + num_rows = table_num_rows; + cardinality = table_num_rows; + } break; case ExecutionOperatingUnitType::CREATE_INDEX: { num_rows = table_num_rows; cardinality = table_num_rows; @@ -335,6 +400,20 @@ void OperatingUnitRecorder::AggregateFeatures(selfdriving::ExecutionOperatingUni num_rows = atoi(idx_name.c_str() + mrpos + sizeof("minirunners__") - 1); cardinality = num_rows; } + + const auto &idx_schema = reinterpret_cast(plan)->GetSchema(); + std::tie(specific_feature0, specific_feature1) = DeriveIndexSpecificFeatures(*idx_schema); + + size_t num_threads = DeriveIndexBuildThreads( + *idx_schema, reinterpret_cast(plan)->GetTableOid()); + + // Adjust the CREATE_INDEX OU by the concurrent estimate. DeriveIndexBuildThreads takes + // into consideration the number of threads available and the number of blocks the + // table has. The num_rows/cardinality is scaled accordingly for now and num_concurrent + // is set to the estimate. + num_rows /= num_threads; + cardinality /= num_threads; + num_concurrent = num_threads; } break; default: num_rows = current_plan_cardinality; @@ -373,8 +452,9 @@ void OperatingUnitRecorder::AggregateFeatures(selfdriving::ExecutionOperatingUni } } - auto feature = ExecutionOperatingUnitFeature(translator->GetTranslatorId(), type, num_rows, key_size, num_keys, - cardinality, mem_factor, num_loops, num_concurrent); + auto feature = + ExecutionOperatingUnitFeature(translator->GetTranslatorId(), type, num_rows, key_size, num_keys, cardinality, + mem_factor, num_loops, num_concurrent, specific_feature0, specific_feature1); pipeline_features_.emplace(type, std::move(feature)); } @@ -431,6 +511,12 @@ void OperatingUnitRecorder::Visit(const planner::CreateIndexPlanNode *plan) { } AggregateFeatures(plan_feature_type_, key_size, num_keys, plan, 1, 1); + + auto translator = current_translator_.CastManagedPointerTo(); + if (translator->GetPipeline()->IsParallel()) { + // Only want to record CREATE_INDEX_MAIN if the operator executes in parallel + AggregateFeatures(ExecutionOperatingUnitType::CREATE_INDEX_MAIN, key_size, num_keys, plan, 1, 1); + } } void OperatingUnitRecorder::Visit(const planner::SeqScanPlanNode *plan) { diff --git a/src/self_driving/modeling/operating_unit_util.cpp b/src/self_driving/modeling/operating_unit_util.cpp index 415f6fd04d..88dd7cf467 100644 --- a/src/self_driving/modeling/operating_unit_util.cpp +++ b/src/self_driving/modeling/operating_unit_util.cpp @@ -13,7 +13,11 @@ bool OperatingUnitUtil::IsOperatingUnitTypeBlocking(ExecutionOperatingUnitType f case ExecutionOperatingUnitType::SORT_BUILD: case ExecutionOperatingUnitType::SORT_TOPK_BUILD: case ExecutionOperatingUnitType::AGGREGATE_BUILD: - case ExecutionOperatingUnitType::CREATE_INDEX: + + // Since the OperatingUnitRecorder wil now generate a CREATE_INDEX_MAIN OU, + // we treat the CREATE_INDEX_MAIN OU as the blocking OU since the caller + // will extract it. + case ExecutionOperatingUnitType::CREATE_INDEX_MAIN: return true; default: return false; @@ -32,8 +36,6 @@ ExecutionOperatingUnitType OperatingUnitUtil::GetNonParallelType(ExecutionOperat case ExecutionOperatingUnitType::PARALLEL_SORT_TOPK_STEP: case ExecutionOperatingUnitType::PARALLEL_SORT_TOPK_MERGE_STEP: return ExecutionOperatingUnitType::SORT_TOPK_BUILD; - case ExecutionOperatingUnitType::CREATE_INDEX_MAIN: - return ExecutionOperatingUnitType::CREATE_INDEX; default: return ExecutionOperatingUnitType::INVALID; } diff --git a/src/storage/index/bplustree_index.cpp b/src/storage/index/bplustree_index.cpp index cf72abc132..cf1a2a233a 100644 --- a/src/storage/index/bplustree_index.cpp +++ b/src/storage/index/bplustree_index.cpp @@ -12,6 +12,26 @@ template BPlusTreeIndex::BPlusTreeIndex(IndexMetadata &&metadata) : Index(std::move(metadata)), bplustree_{new BPlusTree} {} +template +void BPlusTreeIndex::SetInnerNodeSizeUpperThreshold(int threshold) { + bplustree_->SetInnerNodeSizeUpperThreshold(threshold); +} + +template +void BPlusTreeIndex::SetInnerNodeSizeLowerThreshold(int threshold) { + bplustree_->SetInnerNodeSizeLowerThreshold(threshold); +} + +template +int BPlusTreeIndex::GetInnerNodeSizeUpperThreshold() const { + return bplustree_->GetInnerNodeSizeUpperThreshold(); +} + +template +int BPlusTreeIndex::GetInnerNodeSizeLowerThreshold() const { + return bplustree_->GetInnerNodeSizeLowerThreshold(); +} + template size_t BPlusTreeIndex::EstimateHeapUsage() const { return bplustree_->EstimateHeapUsage(); diff --git a/src/storage/index/index_builder.cpp b/src/storage/index/index_builder.cpp index 253181103a..797d96ced6 100644 --- a/src/storage/index/index_builder.cpp +++ b/src/storage/index/index_builder.cpp @@ -4,6 +4,7 @@ #include #include "catalog/catalog_defs.h" +#include "parser/expression/constant_value_expression.h" #include "storage/index/bplustree_index.h" #include "storage/index/bwtree_index.h" #include "storage/index/compact_ints_key.h" @@ -63,12 +64,16 @@ Index *IndexBuilder::BuildBwTreeIntsKey(IndexMetadata metadata) const { Index *index = nullptr; if (key_size <= 8) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 16) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 24) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 32) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } NOISEPAGE_ASSERT(index != nullptr, "Failed to create an IntsKey index."); return index; @@ -86,12 +91,16 @@ Index *IndexBuilder::BuildBwTreeGenericKey(IndexMetadata metadata) const { if (key_size <= 64) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 128) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 256) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 512) { index = new BwTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } NOISEPAGE_ASSERT(index != nullptr, "Failed to create an GenericKey index."); return index; @@ -104,12 +113,16 @@ Index *IndexBuilder::BuildBPlusTreeIntsKey(IndexMetadata &&metadata) const { Index *index = nullptr; if (key_size <= 8) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 16) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 24) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 32) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } NOISEPAGE_ASSERT(index != nullptr, "Failed to create an IntsKey index."); return index; @@ -127,12 +140,16 @@ Index *IndexBuilder::BuildBPlusTreeGenericKey(IndexMetadata metadata) const { if (key_size <= 64) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 128) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 256) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 512) { index = new BPlusTreeIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } NOISEPAGE_ASSERT(index != nullptr, "Failed to create an GenericKey index."); return index; @@ -145,16 +162,22 @@ Index *IndexBuilder::BuildHashIntsKey(IndexMetadata metadata) const { Index *index = nullptr; if (key_size <= 8) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 16) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 32) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 64) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 128) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 256) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } NOISEPAGE_ASSERT(index != nullptr, "Failed to create an IntsKey index."); return index; @@ -171,12 +194,37 @@ Index *IndexBuilder::BuildHashGenericKey(IndexMetadata metadata) const { if (key_size <= 64) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 128) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } else if (key_size <= 256) { index = new HashIndex>(std::move(metadata)); + ApplyIndexOptions>(index); } NOISEPAGE_ASSERT(index != nullptr, "Failed to create an IntsKey index."); return index; } + +template +void IndexBuilder::ApplyIndexOptions(Index *index) const { + auto &options = key_schema_.GetIndexOptions().GetOptions(); + + // This seems to be a known bug: https://bugzilla.mozilla.org/show_bug.cgi?id=1605181 + // NOLINTNEXTLINE + if constexpr (type == storage::index::IndexType::BPLUSTREE) { + if (options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_UPPER_THRESHOLD) != options.end()) { + auto expr = options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_UPPER_THRESHOLD)->second.get(); + auto cve = reinterpret_cast(expr); + reinterpret_cast *>(index)->SetInnerNodeSizeUpperThreshold(cve->Peek()); + } + + if (options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_LOWER_THRESHOLD) != options.end()) { + auto expr = options.find(catalog::IndexOptions::Knob::BPLUSTREE_INNER_NODE_LOWER_THRESHOLD)->second.get(); + auto cve = reinterpret_cast(expr); + reinterpret_cast *>(index)->SetInnerNodeSizeLowerThreshold(cve->Peek()); + } + } +} + } // namespace noisepage::storage::index diff --git a/src/storage/recovery/recovery_manager.cpp b/src/storage/recovery/recovery_manager.cpp index d60a27dec8..22c81fcb73 100644 --- a/src/storage/recovery/recovery_manager.cpp +++ b/src/storage/recovery/recovery_manager.cpp @@ -674,7 +674,8 @@ uint32_t RecoveryManager::ProcessSpecialCasePGClassRecord( // catalog tables, and we can now recreate the object // Step 1: Get the class oid and kind for the object we're updating std::vector col_oids = {catalog::postgres::PgClass::RELOID.oid_, - catalog::postgres::PgClass::RELKIND.oid_}; + catalog::postgres::PgClass::RELKIND.oid_, + catalog::postgres::PgClass::RELOPTIONS.oid_}; auto pr_init = pg_class_ptr->InitializerForProjectedRow(col_oids); auto pr_map = pg_class_ptr->ProjectionMapForOids(col_oids); auto *buffer = common::AllocationUtil::AllocateAligned(pr_init.ProjectedRowSize()); @@ -684,6 +685,8 @@ uint32_t RecoveryManager::ProcessSpecialCasePGClassRecord( *(reinterpret_cast(pr->AccessWithNullCheck(pr_map[catalog::postgres::PgClass::RELOID.oid_]))); auto class_kind = *(reinterpret_cast( pr->AccessWithNullCheck(pr_map[catalog::postgres::PgClass::RELKIND.oid_]))); + auto options = reinterpret_cast( + pr->AccessWithNullCheck(pr_map[catalog::postgres::PgClass::RELOPTIONS.oid_])); switch (class_kind) { case (catalog::postgres::PgClass::RelKind::REGULAR_TABLE): { @@ -739,6 +742,13 @@ uint32_t RecoveryManager::ProcessSpecialCasePGClassRecord( catalog::postgres::PgIndex::IND_TYPE.oid_}; auto pg_index_pr_init = db_catalog->pg_core_.indexes_->InitializerForProjectedRow(col_oids); auto pg_index_pr_map = db_catalog->pg_core_.indexes_->ProjectionMapForOids(col_oids); + + // Get the index options before we free the buffer + catalog::IndexOptions idx_options; + if (options != nullptr) { + idx_options.FromCatalogString(std::string(options->StringView())); + } + delete[] buffer; // Delete old buffer, it won't be large enough for this PR buffer = common::AllocationUtil::AllocateAligned(pg_index_pr_init.ProjectedRowSize()); pr = pg_index_pr_init.InitializeRow(buffer); @@ -757,8 +767,8 @@ uint32_t RecoveryManager::ProcessSpecialCasePGClassRecord( pr->AccessWithNullCheck(pg_index_pr_map[catalog::postgres::PgIndex::IND_TYPE.oid_]))); // Step 4: Create and set IndexSchema in catalog - auto *index_schema = - new catalog::IndexSchema(index_cols, index_type, is_unique, is_primary, is_exclusion, is_immediate); + auto *index_schema = new catalog::IndexSchema(index_cols, index_type, is_unique, is_primary, is_exclusion, + is_immediate, idx_options); result = db_catalog->SetIndexSchemaPointer(common::ManagedPointer(txn), catalog::index_oid_t(class_oid), index_schema); NOISEPAGE_ASSERT(result, diff --git a/test/catalog/catalog_test.cpp b/test/catalog/catalog_test.cpp index 76aa7ff294..e507f79340 100644 --- a/test/catalog/catalog_test.cpp +++ b/test/catalog/catalog_test.cpp @@ -324,7 +324,9 @@ TEST_F(CatalogTests, UserIndexTest) { // Create the index std::vector key_cols{catalog::IndexSchema::Column{ "id", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, schema.GetColumn("id").Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); std::string index_name = "test_table_index_mabobberwithareallylongnamethatstillneedsmore"; auto idx_oid = accessor->CreateIndex(accessor->GetDefaultNamespace(), table_oid, index_name, index_schema); EXPECT_NE(idx_oid, catalog::INVALID_INDEX_OID); @@ -380,7 +382,9 @@ TEST_F(CatalogTests, CascadingDropTableTest) { EXPECT_NE(accessor, nullptr); std::vector key_cols{catalog::IndexSchema::Column{ "id", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, schema.GetColumn("id").Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); auto idx_oid = accessor->CreateIndex(accessor->GetDefaultNamespace(), table_oid, "test_index", index_schema); EXPECT_NE(idx_oid, catalog::INVALID_INDEX_OID); auto true_schema = accessor->GetIndexSchema(idx_oid); @@ -444,7 +448,9 @@ TEST_F(CatalogTests, CascadingDropNamespaceTest) { EXPECT_NE(accessor, nullptr); std::vector key_cols{catalog::IndexSchema::Column{ "id", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, schema.GetColumn("id").Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); auto idx_oid = accessor->CreateIndex(ns_oid, table_oid, "test_index", index_schema); EXPECT_NE(idx_oid, catalog::INVALID_INDEX_OID); auto true_schema = accessor->GetIndexSchema(idx_oid); @@ -509,7 +515,9 @@ TEST_F(CatalogTests, CascadingDropNamespaceWithIndexOnOtherNamespaceTest) { EXPECT_NE(accessor, nullptr); std::vector key_cols{catalog::IndexSchema::Column{ "id", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, schema.GetColumn("id").Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); auto idx_oid = accessor->CreateIndex(ns_oid, table_oid, "test_index", index_schema); EXPECT_NE(idx_oid, catalog::INVALID_INDEX_OID); auto true_schema = accessor->GetIndexSchema(idx_oid); @@ -690,7 +698,9 @@ TEST_F(CatalogTests, GetIndexesTest) { // Create the index std::vector key_cols{catalog::IndexSchema::Column{ "id", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, schema.GetColumn("id").Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); auto idx_oid = accessor->CreateIndex(accessor->GetDefaultNamespace(), table_oid, "test_table_idx", index_schema); EXPECT_NE(idx_oid, catalog::INVALID_INDEX_OID); auto true_schema = accessor->GetIndexSchema(idx_oid); @@ -737,7 +747,9 @@ TEST_F(CatalogTests, GetIndexObjectsTest) { std::vector key_cols{ catalog::IndexSchema::Column{"id", type::TypeId::INTEGER, false, parser::ColumnValueExpression(db_, table_oid, schema.GetColumn("id").Oid())}}; - auto index_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto index_schema = + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); auto idx_oid = accessor->CreateIndex(accessor->GetDefaultNamespace(), table_oid, "test_table_idx" + std::to_string(i), index_schema); EXPECT_NE(idx_oid, catalog::INVALID_INDEX_OID); diff --git a/test/execution/ddl_executors_test.cpp b/test/execution/ddl_executors_test.cpp index dbd6acafbb..76ff3fc777 100644 --- a/test/execution/ddl_executors_test.cpp +++ b/test/execution/ddl_executors_test.cpp @@ -45,8 +45,9 @@ class DDLExecutorsTests : public TerrierTest { parser::ColumnValueExpression(CatalogTestUtil::TEST_DB_OID, CatalogTestUtil::TEST_TABLE_OID, catalog::col_oid_t(1))); StorageTestUtil::ForceOid(&(keycols[0]), catalog::indexkeycol_oid_t(1)); - index_schema_ = - std::make_unique(keycols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + index_schema_ = std::make_unique(keycols, storage::index::IndexType::BPLUSTREE, true, true, + false, true, options); txn_ = txn_manager_->BeginTransaction(); accessor_ = catalog_->GetAccessor(common::ManagedPointer(txn_), db_, DISABLED); diff --git a/test/execution/index_create_test.cpp b/test/execution/index_create_test.cpp index d2ea6f563c..2f1a8c0c94 100644 --- a/test/execution/index_create_test.cpp +++ b/test/execution/index_create_test.cpp @@ -106,7 +106,9 @@ TEST_F(IndexCreateTest, SimpleIndexCreate) { parser::ColumnValueExpression col_expr(table_oid, table_col.Oid(), table_col.Type()); index_cols.emplace_back("index_colA", type::TypeId::INTEGER, false, col_expr); - catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false}; + catalog::IndexOptions options; + catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false, + options}; CreateIndex(table_oid, "indexA", std::make_unique(tmp_index_schema)); @@ -126,7 +128,9 @@ TEST_F(IndexCreateTest, SimpleIndexCreate2) { parser::ColumnValueExpression col_expr(table_oid, table_col.Oid(), table_col.Type()); index_cols.emplace_back("index_colE", type::TypeId::INTEGER, false, col_expr); - catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false}; + catalog::IndexOptions options; + catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false, + options}; CreateIndex(table_oid, "indexE", std::make_unique(tmp_index_schema)); @@ -150,7 +154,9 @@ TEST_F(IndexCreateTest, MultiColumnIndexCreate) { parser::ColumnValueExpression col_expr_b(table_oid, table_col_b.Oid(), table_col_b.Type()); index_cols.emplace_back("index_colB", type::TypeId::INTEGER, false, col_expr_b); - catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false}; + catalog::IndexOptions options; + catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false, + options}; CreateIndex(table_oid, "indexAB", std::make_unique(tmp_index_schema)); diff --git a/test/execution/sql_table_vector_iterator_test.cpp b/test/execution/sql_table_vector_iterator_test.cpp index 62ecc660df..9309f5ce21 100644 --- a/test/execution/sql_table_vector_iterator_test.cpp +++ b/test/execution/sql_table_vector_iterator_test.cpp @@ -159,7 +159,7 @@ TEST_F(TableVectorIteratorTest, ParallelScanTest) { auto table_oid = exec_ctx_->GetAccessor()->GetTableOid(NSOid(), "test_1"); std::array col_oids{1, 2, 3, 4}; TableVectorIterator::ParallelScan(table_oid.UnderlyingValue(), col_oids.data(), col_oids.size(), nullptr, - exec_ctx_.get(), scanner); + exec_ctx_.get(), 0, scanner); // Count total aggregate tuple count seen by all threads uint32_t aggregate_tuple_count = 0; diff --git a/test/include/test_util/end_to_end_test.h b/test/include/test_util/end_to_end_test.h index 5816bedb7a..c641b331e8 100644 --- a/test/include/test_util/end_to_end_test.h +++ b/test/include/test_util/end_to_end_test.h @@ -8,6 +8,7 @@ #include "binder/bind_node_visitor.h" #include "execution/compiler/compilation_context.h" #include "execution/compiler/output_checker.h" +#include "execution/sql/ddl_executors.h" #include "execution/sql_test.h" #include "gtest/gtest.h" #include "optimizer/cost_model/abstract_cost_model.h" @@ -64,6 +65,11 @@ class EndToEndTest : public execution::SqlBasedTest { auto table_oid = analyze_plan->GetTableOid(); std::vector col_oids = analyze_plan->GetColumnOids(); test_txn_->RegisterCommitAction([=]() { stats_storage_->MarkStatsStale(db_oid, table_oid, col_oids); }); + } else if (out_plan->GetPlanNodeType() == planner::PlanNodeType::CREATE_INDEX) { + execution::sql::DDLExecutors::CreateIndexExecutor( + common::ManagedPointer( + reinterpret_cast(out_plan.get())), + common::ManagedPointer(accessor)); } // Execute diff --git a/test/include/test_util/storage_test_util.h b/test/include/test_util/storage_test_util.h index 18dc8e3c9f..b7987d6c2e 100644 --- a/test/include/test_util/storage_test_util.h +++ b/test/include/test_util/storage_test_util.h @@ -516,7 +516,8 @@ class StorageTestUtil { ForceOid(&(key_cols.back()), key_oid); } - return catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options); } /** @@ -555,7 +556,8 @@ class StorageTestUtil { bytes_used = static_cast(bytes_used + type::TypeUtil::GetTypeSize(type)); } - return catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options); } private: diff --git a/test/include/test_util/tpcc/schemas.h b/test/include/test_util/tpcc/schemas.h index 9f19d0c585..bc10a10d52 100644 --- a/test/include/test_util/tpcc/schemas.h +++ b/test/include/test_util/tpcc/schemas.h @@ -83,7 +83,8 @@ class Schemas { NOISEPAGE_ASSERT(warehouse_key_schema.size() == NUM_WAREHOUSE_PRIMARY_INDEX_COLS, "Wrong number of columns for Warehouse primary index schema."); - return catalog::IndexSchema(warehouse_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(warehouse_key_schema, index_type, true, true, false, true, options); } /** @@ -157,7 +158,8 @@ class Schemas { NOISEPAGE_ASSERT(district_key_schema.size() == NUM_DISTRICT_PRIMARY_INDEX_COLS, "Wrong number of columns for District primary index schema."); - return catalog::IndexSchema(district_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(district_key_schema, index_type, true, true, false, true, options); } /** @@ -264,7 +266,8 @@ class Schemas { NOISEPAGE_ASSERT(customer_key_schema.size() == NUM_CUSTOMER_PRIMARY_INDEX_COLS, "Wrong number of columns for Customer primary index schema."); - return catalog::IndexSchema(customer_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(customer_key_schema, index_type, true, true, false, true, options); } /** @@ -293,7 +296,8 @@ class Schemas { NOISEPAGE_ASSERT(customer_secondary_key_schema.size() == NUM_CUSTOMER_SECONDARY_INDEX_COLS, "Wrong number of columns for Customer secondary index schema."); - return catalog::IndexSchema(customer_secondary_key_schema, index_type, false, false, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(customer_secondary_key_schema, index_type, false, false, false, true, options); } /** @@ -385,7 +389,8 @@ class Schemas { NOISEPAGE_ASSERT(new_order_key_schema.size() == NUM_NEW_ORDER_PRIMARY_INDEX_COLS, "Wrong number of columns for New Order primary index schema."); - return catalog::IndexSchema(new_order_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(new_order_key_schema, index_type, true, true, false, true, options); } /** @@ -452,7 +457,8 @@ class Schemas { NOISEPAGE_ASSERT(order_key_schema.size() == NUM_ORDER_PRIMARY_INDEX_COLS, "Wrong number of columns for Order primary index schema."); - return catalog::IndexSchema(order_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(order_key_schema, index_type, true, true, false, true, options); } /** @@ -484,7 +490,8 @@ class Schemas { NOISEPAGE_ASSERT(order_secondary_key_schema.size() == NUM_ORDER_SECONDARY_INDEX_COLS, "Wrong number of columns for Order secondary index schema."); - return catalog::IndexSchema(order_secondary_key_schema, index_type, true, false, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(order_secondary_key_schema, index_type, true, false, false, true, options); } /** @@ -561,7 +568,8 @@ class Schemas { NOISEPAGE_ASSERT(order_line_key_schema.size() == NUM_ORDER_LINE_PRIMARY_INDEX_COLS, "Wrong number of columns for Order Line key schema."); - return catalog::IndexSchema(order_line_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(order_line_key_schema, index_type, true, true, false, true, options); } /** @@ -612,7 +620,8 @@ class Schemas { NOISEPAGE_ASSERT(item_key_schema.size() == NUM_ITEM_PRIMARY_INDEX_COLS, "Wrong number of columns for Item primary index schema."); - return catalog::IndexSchema(item_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(item_key_schema, index_type, true, true, false, true, options); } /** @@ -703,7 +712,8 @@ class Schemas { NOISEPAGE_ASSERT(stock_key_schema.size() == NUM_STOCK_PRIMARY_INDEX_COLS, "Wrong number of columns for Stock primary index schema."); - return catalog::IndexSchema(stock_key_schema, index_type, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(stock_key_schema, index_type, true, true, false, true, options); } private: diff --git a/test/optimizer/logical_operator_test.cpp b/test/optimizer/logical_operator_test.cpp index 4d3794ddea..ea8f9c77b2 100644 --- a/test/optimizer/logical_operator_test.cpp +++ b/test/optimizer/logical_operator_test.cpp @@ -1585,6 +1585,7 @@ TEST(OperatorTests, LogicalCreateIndexTest) { // Due to the deferred action framework being used to manage memory, we need to // simulate a transaction to prevent leaks + catalog::IndexOptions options; auto timestamp_manager = transaction::TimestampManager(); auto deferred_action_manager = transaction::DeferredActionManager(common::ManagedPointer(×tamp_manager)); auto buffer_pool = storage::RecordBufferSegmentPool(100, 2); @@ -1596,7 +1597,7 @@ TEST(OperatorTests, LogicalCreateIndexTest) { Operator op1 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), parser::IndexType::BPLUSTREE, true, "index_1", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_EQ(op1.GetOpType(), OpType::LOGICALCREATEINDEX); @@ -1612,7 +1613,7 @@ TEST(OperatorTests, LogicalCreateIndexTest) { Operator op2 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), parser::IndexType::BPLUSTREE, true, "index_1", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_TRUE(op1 == op2); EXPECT_EQ(op1.Hash(), op2.Hash()); @@ -1623,17 +1624,19 @@ TEST(OperatorTests, LogicalCreateIndexTest) { common::ManagedPointer( new parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(9)))}; auto raw_values_copy = raw_values; - Operator op3 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), - parser::IndexType::BPLUSTREE, true, "index_1", std::move(raw_values_copy)) - .RegisterWithTxnContext(txn_context); + Operator op3 = + LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), + parser::IndexType::BPLUSTREE, true, "index_1", std::move(raw_values_copy), options) + .RegisterWithTxnContext(txn_context); EXPECT_EQ(op3.GetContentsAs()->GetIndexAttr(), raw_values); EXPECT_FALSE(op3 == op1); EXPECT_NE(op1.Hash(), op3.Hash()); auto raw_values_copy2 = raw_values; - Operator op4 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), - parser::IndexType::BPLUSTREE, true, "index_1", std::move(raw_values_copy2)) - .RegisterWithTxnContext(txn_context); + Operator op4 = + LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), + parser::IndexType::BPLUSTREE, true, "index_1", std::move(raw_values_copy2), options) + .RegisterWithTxnContext(txn_context); EXPECT_EQ(op4.GetContentsAs()->GetIndexAttr(), raw_values); EXPECT_TRUE(op3 == op4); EXPECT_EQ(op4.Hash(), op3.Hash()); @@ -1644,44 +1647,45 @@ TEST(OperatorTests, LogicalCreateIndexTest) { common::ManagedPointer( new parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(9)))}; auto raw_values_copy3 = raw_values_2; - Operator op10 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), - parser::IndexType::BPLUSTREE, true, "index_1", std::move(raw_values_copy3)) - .RegisterWithTxnContext(txn_context); + Operator op10 = + LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), + parser::IndexType::BPLUSTREE, true, "index_1", std::move(raw_values_copy3), options) + .RegisterWithTxnContext(txn_context); EXPECT_EQ(op10.GetContentsAs()->GetIndexAttr(), raw_values_2); EXPECT_FALSE(op3 == op10); EXPECT_NE(op10.Hash(), op3.Hash()); Operator op5 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(2), catalog::table_oid_t(1), parser::IndexType::BPLUSTREE, true, "index_1", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_FALSE(op1 == op5); EXPECT_NE(op1.Hash(), op5.Hash()); Operator op6 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(2), parser::IndexType::BPLUSTREE, true, "index_1", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_FALSE(op1 == op6); EXPECT_NE(op1.Hash(), op6.Hash()); Operator op7 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), parser::IndexType::HASH, true, "index_1", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_FALSE(op1 == op7); EXPECT_NE(op1.Hash(), op7.Hash()); Operator op8 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), parser::IndexType::BPLUSTREE, false, "index_1", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_FALSE(op1 == op8); EXPECT_NE(op1.Hash(), op8.Hash()); Operator op9 = LogicalCreateIndex::Make(catalog::db_oid_t(1), catalog::namespace_oid_t(1), catalog::table_oid_t(1), parser::IndexType::BPLUSTREE, true, "index_2", - std::vector>{}) + std::vector>{}, options) .RegisterWithTxnContext(txn_context); EXPECT_FALSE(op1 == op9); EXPECT_NE(op1.Hash(), op9.Hash()); diff --git a/test/optimizer/operator_transformer_test.cpp b/test/optimizer/operator_transformer_test.cpp index f4b9a4e76f..492a4f6553 100644 --- a/test/optimizer/operator_transformer_test.cpp +++ b/test/optimizer/operator_transformer_test.cpp @@ -126,7 +126,9 @@ class OperatorTransformerTest : public TerrierTest { auto col = catalog::IndexSchema::Column( "a1", type::TypeId::INTEGER, true, parser::ColumnValueExpression(db_oid_, table_a_oid_, accessor_->GetSchema(table_a_oid_).GetColumn("a1").Oid())); - auto idx_schema = catalog::IndexSchema({col}, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + auto idx_schema = + catalog::IndexSchema({col}, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); a_index_oid_ = accessor_->CreateIndex(accessor_->GetDefaultNamespace(), table_a_oid_, "a_index", idx_schema); storage::index::IndexBuilder index_builder; index_builder.SetKeySchema(accessor_->GetIndexSchema(a_index_oid_)); diff --git a/test/optimizer/physical_operator_test.cpp b/test/optimizer/physical_operator_test.cpp index 8127d62dd0..c77aa2f471 100644 --- a/test/optimizer/physical_operator_test.cpp +++ b/test/optimizer/physical_operator_test.cpp @@ -1677,11 +1677,12 @@ TEST(OperatorTests, CreateIndexTest) { transaction::TransactionContext *txn_context = txn_manager.BeginTransaction(); + catalog::IndexOptions options; auto idx_schema = std::make_unique( std::vector{catalog::IndexSchema::Column( "col_1", type::TypeId::TINYINT, true, parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); Operator op1 = CreateIndex::Make(catalog::namespace_oid_t(1), catalog::table_oid_t(1), "index_1", std::move(idx_schema)) @@ -1696,14 +1697,14 @@ TEST(OperatorTests, CreateIndexTest) { std::vector{catalog::IndexSchema::Column( "col_1", type::TypeId::TINYINT, true, parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); EXPECT_EQ(*op1.GetContentsAs()->GetSchema(), *idx_schema_dup); auto idx_schema_2 = std::make_unique( std::vector{catalog::IndexSchema::Column( "col_1", type::TypeId::TINYINT, true, parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); Operator op2 = CreateIndex::Make(catalog::namespace_oid_t(1), catalog::table_oid_t(1), "index_1", std::move(idx_schema_2)) .RegisterWithTxnContext(txn_context); @@ -1714,7 +1715,7 @@ TEST(OperatorTests, CreateIndexTest) { std::vector{catalog::IndexSchema::Column( "col_1", type::TypeId::TINYINT, true, parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); Operator op3 = CreateIndex::Make(catalog::namespace_oid_t(2), catalog::table_oid_t(1), "index_1", std::move(idx_schema_3)) .RegisterWithTxnContext(txn_context); @@ -1725,7 +1726,7 @@ TEST(OperatorTests, CreateIndexTest) { std::vector{catalog::IndexSchema::Column( "col_1", type::TypeId::TINYINT, true, parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); Operator op4 = CreateIndex::Make(catalog::namespace_oid_t(1), catalog::table_oid_t(1), "index_2", std::move(idx_schema_4)) .RegisterWithTxnContext(txn_context); @@ -1736,7 +1737,7 @@ TEST(OperatorTests, CreateIndexTest) { std::vector{catalog::IndexSchema::Column( "col_1", type::TypeId::INTEGER, true, parser::ConstantValueExpression(type::TypeId::INTEGER, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); Operator op5 = CreateIndex::Make(catalog::namespace_oid_t(1), catalog::table_oid_t(1), "index_1", std::move(idx_schema_5)) .RegisterWithTxnContext(txn_context); @@ -1751,7 +1752,7 @@ TEST(OperatorTests, CreateIndexTest) { catalog::IndexSchema::Column( "col_2", type::TypeId::TINYINT, true, parser::ConstantValueExpression(type::TypeId::INTEGER, execution::sql::Integer(1)))}, - storage::index::IndexType::BPLUSTREE, true, true, true, true); + storage::index::IndexType::BPLUSTREE, true, true, true, true, options); Operator op6 = CreateIndex::Make(catalog::namespace_oid_t(1), catalog::table_oid_t(1), "index_1", std::move(idx_schema_6)) .RegisterWithTxnContext(txn_context); diff --git a/test/self_driving_e2e/model_server_test.cpp b/test/self_driving_e2e/model_server_test.cpp index da000ceabf..6cae28d89b 100644 --- a/test/self_driving_e2e/model_server_test.cpp +++ b/test/self_driving_e2e/model_server_test.cpp @@ -80,10 +80,10 @@ TEST_F(ModelServerTest, OUAndInterferenceModelTest) { // ------------------------------------------------------- std::vector> features{ - {0, 0, 10000, 4, 1, 10000, 1, 0, 0}, - {0, 0, 10000, 4, 1, 10000, 1, 0, 0}, - {0, 0, 10000, 4, 1, 10000, 1, 0, 0}, - {0, 0, 10000, 4, 1, 10000, 1, 0, 0}, + {0, 0, 10000, 4, 1, 10000, 1, 0, 0, 0, 0}, + {0, 0, 10000, 4, 1, 10000, 1, 0, 0, 0, 0}, + {0, 0, 10000, 4, 1, 10000, 1, 0, 0, 0, 0}, + {0, 0, 10000, 4, 1, 10000, 1, 0, 0, 0, 0}, }; // Perform a training of the opunit models with {lr, rf} as training methods. diff --git a/test/sql/create_index_options_test.cpp b/test/sql/create_index_options_test.cpp new file mode 100644 index 0000000000..cddd2cc64e --- /dev/null +++ b/test/sql/create_index_options_test.cpp @@ -0,0 +1,53 @@ + +#include "execution/compiler/output_checker.h" +#include "gtest/gtest.h" +#include "spdlog/fmt/fmt.h" +#include "storage/index/bplustree_index.h" +#include "storage/index/index.h" +#include "test_util/end_to_end_test.h" +#include "test_util/test_harness.h" + +namespace noisepage::test { + +class CreateIndexOptionsTest : public EndToEndTest { + public: + void SetUp() override { + EndToEndTest::SetUp(); + auto exec_ctx = MakeExecCtx(); + GenerateTestTables(exec_ctx.get()); + } +}; + +// NOLINTNEXTLINE +TEST_F(CreateIndexOptionsTest, BPlusTreeOptions) { + RunQuery("CREATE INDEX test_2_idx_upper ON test_2 (col1) WITH (BPLUSTREE_INNER_NODE_UPPER_THRESHOLD = 256)"); + RunQuery("CREATE INDEX test_2_idx_lower ON test_2 (col1) WITH (BPLUSTREE_INNER_NODE_LOWER_THRESHOLD = 4)"); + RunQuery( + "CREATE INDEX test_2_idx_both ON test_2 (col1) WITH (BPLUSTREE_INNER_NODE_UPPER_THRESHOLD = 256, " + "BPLUSTREE_INNER_NODE_LOWER_THRESHOLD = 4)"); + + auto test_2_idx_upper = accessor_->GetIndex(accessor_->GetIndexOid("test_2_idx_upper")); + auto test_2_idx_lower = accessor_->GetIndex(accessor_->GetIndexOid("test_2_idx_lower")); + auto test_2_idx_both = accessor_->GetIndex(accessor_->GetIndexOid("test_2_idx_both")); + ASSERT_TRUE(test_2_idx_upper); + ASSERT_TRUE(test_2_idx_lower); + ASSERT_TRUE(test_2_idx_both); + + ASSERT_EQ(test_2_idx_upper->Type(), storage::index::IndexType::BPLUSTREE); + ASSERT_EQ(test_2_idx_lower->Type(), storage::index::IndexType::BPLUSTREE); + ASSERT_EQ(test_2_idx_both->Type(), storage::index::IndexType::BPLUSTREE); + + auto test_2_idx_upper_bpt_index = + test_2_idx_upper.CastManagedPointerTo>>(); + auto test_2_idx_lower_bpt_index = + test_2_idx_lower.CastManagedPointerTo>>(); + auto test_2_idx_both_bpt_index = + test_2_idx_both.CastManagedPointerTo>>(); + + ASSERT_EQ(test_2_idx_upper_bpt_index->GetInnerNodeSizeUpperThreshold(), 256); + ASSERT_EQ(test_2_idx_lower_bpt_index->GetInnerNodeSizeLowerThreshold(), 4); + ASSERT_EQ(test_2_idx_both_bpt_index->GetInnerNodeSizeUpperThreshold(), 256); + ASSERT_EQ(test_2_idx_both_bpt_index->GetInnerNodeSizeLowerThreshold(), 4); +} + +} // namespace noisepage::test diff --git a/test/storage/bplustree_index_test.cpp b/test/storage/bplustree_index_test.cpp index 28335ff3c3..4b5f001c5d 100644 --- a/test/storage/bplustree_index_test.cpp +++ b/test/storage/bplustree_index_test.cpp @@ -62,8 +62,11 @@ class BPlusTreeIndexTests : public TerrierTest { parser::ColumnValueExpression(CatalogTestUtil::TEST_DB_OID, CatalogTestUtil::TEST_TABLE_OID, catalog::col_oid_t(1))); StorageTestUtil::ForceOid(&(keycols[0]), catalog::indexkeycol_oid_t(1)); - unique_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::BPLUSTREE, true, true, false, true); - default_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::BPLUSTREE, false, false, false, true); + catalog::IndexOptions options; + unique_schema_ = + catalog::IndexSchema(keycols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); + default_schema_ = + catalog::IndexSchema(keycols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options); unique_index_ = (IndexBuilder().SetKeySchema(unique_schema_)).Build(); default_index_ = (IndexBuilder().SetKeySchema(default_schema_)).Build(); diff --git a/test/storage/bwtree_index_test.cpp b/test/storage/bwtree_index_test.cpp index a196033bc7..079542dea0 100644 --- a/test/storage/bwtree_index_test.cpp +++ b/test/storage/bwtree_index_test.cpp @@ -69,8 +69,10 @@ class BwTreeIndexTests : public TerrierTest { parser::ColumnValueExpression(CatalogTestUtil::TEST_DB_OID, CatalogTestUtil::TEST_TABLE_OID, catalog::col_oid_t(1))); StorageTestUtil::ForceOid(&(keycols[0]), catalog::indexkeycol_oid_t(1)); - unique_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::BWTREE, true, true, false, true); - default_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::BWTREE, false, false, false, true); + catalog::IndexOptions options; + unique_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::BWTREE, true, true, false, true, options); + default_schema_ = + catalog::IndexSchema(keycols, storage::index::IndexType::BWTREE, false, false, false, true, options); unique_index_ = (IndexBuilder().SetKeySchema(unique_schema_)).Build(); default_index_ = (IndexBuilder().SetKeySchema(default_schema_)).Build(); diff --git a/test/storage/hash_index_test.cpp b/test/storage/hash_index_test.cpp index 9866376b35..74057f7865 100644 --- a/test/storage/hash_index_test.cpp +++ b/test/storage/hash_index_test.cpp @@ -66,8 +66,11 @@ class HashIndexTests : public TerrierTest { parser::ColumnValueExpression(CatalogTestUtil::TEST_DB_OID, CatalogTestUtil::TEST_TABLE_OID, catalog::col_oid_t(1))); StorageTestUtil::ForceOid(&(keycols[0]), catalog::indexkeycol_oid_t(1)); - unique_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::HASHMAP, true, true, false, true); - default_schema_ = catalog::IndexSchema(keycols, storage::index::IndexType::HASHMAP, false, false, false, true); + catalog::IndexOptions options; + unique_schema_ = + catalog::IndexSchema(keycols, storage::index::IndexType::HASHMAP, true, true, false, true, options); + default_schema_ = + catalog::IndexSchema(keycols, storage::index::IndexType::HASHMAP, false, false, false, true, options); unique_index_ = (IndexBuilder().SetKeySchema(unique_schema_)).Build(); default_index_ = (IndexBuilder().SetKeySchema(default_schema_)).Build(); diff --git a/test/storage/index_key_test.cpp b/test/storage/index_key_test.cpp index d225bf74bb..e805659fa9 100644 --- a/test/storage/index_key_test.cpp +++ b/test/storage/index_key_test.cpp @@ -439,7 +439,9 @@ TEST_F(IndexKeyTests, IndexMetadataCompactIntsKeyTest) { key_cols.emplace_back("", type::TypeId::SMALLINT, false, parser::ConstantValueExpression(type::TypeId::SMALLINT)); StorageTestUtil::ForceOid(&(key_cols.back()), oid++); - IndexMetadata metadata(catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true)); + catalog::IndexOptions options; + IndexMetadata metadata( + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options)); // identical key schema const auto &metadata_key_schema = metadata.GetSchema().GetColumns(); @@ -534,7 +536,9 @@ TEST_F(IndexKeyTests, IndexMetadataGenericKeyNoMustInlineVarlenTest) { key_cols.emplace_back("", type::TypeId::VARCHAR, 12, false, parser::ConstantValueExpression(type::TypeId::VARCHAR)); StorageTestUtil::ForceOid(&(key_cols.back()), oid++); - IndexMetadata metadata(catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true)); + catalog::IndexOptions options; + IndexMetadata metadata( + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options)); // identical key schema const auto &metadata_key_schema = metadata.GetSchema().GetColumns(); @@ -624,7 +628,9 @@ TEST_F(IndexKeyTests, IndexMetadataGenericKeyMustInlineVarlenTest) { key_cols.emplace_back("", type::TypeId::VARCHAR, 90, false, parser::ConstantValueExpression(type::TypeId::VARCHAR)); StorageTestUtil::ForceOid(&(key_cols.back()), oid++); - IndexMetadata metadata(catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true)); + catalog::IndexOptions options; + IndexMetadata metadata( + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options)); // identical key schema const auto &metadata_key_schema = metadata.GetSchema().GetColumns(); @@ -815,7 +821,8 @@ void CompactIntsKeyBasicTest(type::TypeId type_id, Random *const generator) { key_cols.emplace_back("", type_id, false, parser::ConstantValueExpression(type_id)); StorageTestUtil::ForceOid(&(key_cols.back()), catalog::indexkeycol_oid_t(i)); } - key_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true); + catalog::IndexOptions options; + key_schema = catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options); const IndexMetadata metadata(key_schema); const auto &initializer = metadata.GetProjectedRowInitializer(); @@ -876,8 +883,9 @@ void NumericComparisons(const type::TypeId type_id, const bool nullable) { key_cols.emplace_back("", type_id, nullable, parser::ConstantValueExpression(type_id)); StorageTestUtil::ForceOid(&(key_cols.back()), catalog::indexkeycol_oid_t(0)); + catalog::IndexOptions options; const IndexMetadata metadata( - catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true)); + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options)); const auto &initializer = metadata.GetProjectedRowInitializer(); auto *const pr_buffer = common::AllocationUtil::AllocateAligned(initializer.ProjectedRowSize()); @@ -967,8 +975,9 @@ void UnorderedNumericComparisons(const type::TypeId type_id, const bool nullable key_cols.emplace_back("", type_id, nullable, parser::ConstantValueExpression(type_id)); StorageTestUtil::ForceOid(&(key_cols.back()), catalog::indexkeycol_oid_t(0)); + catalog::IndexOptions options; const IndexMetadata metadata( - catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true)); + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options)); const auto &initializer = metadata.GetProjectedRowInitializer(); auto *const pr_buffer = common::AllocationUtil::AllocateAligned(initializer.ProjectedRowSize()); @@ -1044,8 +1053,9 @@ TEST_F(IndexKeyTests, GenericKeyInlineVarlenComparisons) { key_cols.emplace_back("", type::TypeId::VARCHAR, 12, true, parser::ConstantValueExpression(type::TypeId::VARCHAR)); StorageTestUtil::ForceOid(&(key_cols.back()), catalog::indexkeycol_oid_t(0)); + catalog::IndexOptions options; const IndexMetadata metadata( - catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true)); + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options)); const auto &initializer = metadata.GetProjectedRowInitializer(); auto *const pr_buffer = common::AllocationUtil::AllocateAligned(initializer.ProjectedRowSize()); @@ -1152,8 +1162,9 @@ TEST_F(IndexKeyTests, GenericKeyNonInlineVarlenComparisons) { key_cols.emplace_back("", type::TypeId::VARCHAR, 20, true, parser::ConstantValueExpression(type::TypeId::VARCHAR)); StorageTestUtil::ForceOid(&(key_cols.back()), catalog::indexkeycol_oid_t(0)); + catalog::IndexOptions options; const IndexMetadata metadata( - catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true)); + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options)); const auto &initializer = metadata.GetProjectedRowInitializer(); auto *const pr_buffer = common::AllocationUtil::AllocateAligned(initializer.ProjectedRowSize()); @@ -1267,8 +1278,9 @@ TEST_F(IndexKeyTests, GenericKeyBuilderVarlenSizeEdgeCaseTest) { std::vector key_cols; key_cols.emplace_back("", type::TypeId::VARCHAR, 64, false, parser::ConstantValueExpression(type::TypeId::VARCHAR)); StorageTestUtil::ForceOid(&(key_cols.back()), catalog::indexkeycol_oid_t(15445)); + catalog::IndexOptions options; const auto key_schema = - catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true); + catalog::IndexSchema(key_cols, storage::index::IndexType::BPLUSTREE, false, false, false, true, options); IndexBuilder builder; builder.SetKeySchema(key_schema); diff --git a/test/storage/recovery_test.cpp b/test/storage/recovery_test.cpp index 38471fc9b5..8a4e0a069a 100644 --- a/test/storage/recovery_test.cpp +++ b/test/storage/recovery_test.cpp @@ -86,7 +86,8 @@ class RecoveryTests : public TerrierTest { "", type::TypeId::INTEGER, false, parser::ColumnValueExpression(catalog::db_oid_t(0), catalog::table_oid_t(0), catalog::col_oid_t(1))); StorageTestUtil::ForceOid(&(keycols[0]), catalog::indexkeycol_oid_t(1)); - return catalog::IndexSchema(keycols, storage::index::IndexType::BPLUSTREE, true, true, false, true); + catalog::IndexOptions options; + return catalog::IndexSchema(keycols, storage::index::IndexType::BPLUSTREE, true, true, false, true, options); } catalog::db_oid_t CreateDatabase(transaction::TransactionContext *txn, diff --git a/util/execution/table_generator/table_generator.cpp b/util/execution/table_generator/table_generator.cpp index 8ed79b7693..c06748963e 100644 --- a/util/execution/table_generator/table_generator.cpp +++ b/util/execution/table_generator/table_generator.cpp @@ -318,7 +318,9 @@ void TableGenerator::CreateIndex(IndexInsertMeta *index_meta) { index_cols.emplace_back(col_meta.name_, col_meta.type_, 100, col_meta.nullable_, col_expr); } } - catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false}; + catalog::IndexOptions options; + catalog::IndexSchema tmp_index_schema{index_cols, storage::index::IndexType::BPLUSTREE, false, false, false, false, + options}; // Create Index auto index_oid = exec_ctx_->GetAccessor()->CreateIndex(ns_oid_, table_oid, index_meta->index_name_, tmp_index_schema); auto &index_schema = exec_ctx_->GetAccessor()->GetIndexSchema(index_oid); diff --git a/util/execution/table_generator/table_reader.cpp b/util/execution/table_generator/table_reader.cpp index 56849f8d44..392ac3d72e 100644 --- a/util/execution/table_generator/table_reader.cpp +++ b/util/execution/table_generator/table_reader.cpp @@ -87,8 +87,9 @@ void TableReader::CreateIndexes(TableInfo *info, catalog::table_oid_t table_oid) storage::index::IndexBuilder index_builder; for (auto &index_info : info->indexes_) { // Create index in catalog + catalog::IndexOptions options; catalog::IndexSchema tmp_schema{ - index_info->cols_, storage::index::IndexType::BPLUSTREE, false, false, false, false}; + index_info->cols_, storage::index::IndexType::BPLUSTREE, false, false, false, false, options}; auto index_oid = exec_ctx_->GetAccessor()->CreateIndex(ns_oid_, table_oid, index_info->index_name_, tmp_schema); auto &schema = exec_ctx_->GetAccessor()->GetIndexSchema(index_oid);