From 42fe13dd961fc58ae2532c5e73c61b5de989c2c2 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Mon, 24 Jun 2024 13:49:19 +0200 Subject: [PATCH 1/2] add progressbar support for index creation --- src/hnsw/hnsw_index_physical_create.cpp | 26 +++++++++++++++++++ .../hnsw/hnsw_index_physical_create.hpp | 2 ++ 2 files changed, 28 insertions(+) diff --git a/src/hnsw/hnsw_index_physical_create.cpp b/src/hnsw/hnsw_index_physical_create.cpp index 2532ec5..e3d6dda 100644 --- a/src/hnsw/hnsw_index_physical_create.cpp +++ b/src/hnsw/hnsw_index_physical_create.cpp @@ -43,6 +43,11 @@ class CreateHNSWIndexGlobalState final : public GlobalSinkState { // Parallel scan state ColumnDataParallelScanState scan_state; + + // Track which phase we're in + atomic is_building = {false}; + atomic loaded_count = {0}; + atomic built_count = {0}; }; unique_ptr PhysicalCreateHNSWIndex::GetGlobalSinkState(ClientContext &context) const { @@ -90,7 +95,9 @@ SinkResultType PhysicalCreateHNSWIndex::Sink(ExecutionContext &context, DataChun OperatorSinkInput &input) const { auto &lstate = input.local_state.Cast(); + auto &gstate = input.global_state.Cast(); lstate.collection->Append(lstate.append_state, chunk); + gstate.loaded_count += chunk.size(); return SinkResultType::NEED_MORE_INPUT; } @@ -178,6 +185,9 @@ class HNSWIndexConstructTask final : public ExecutorTask { } } + // Update the built count + gstate.built_count += count; + if (mode == TaskExecutionMode::PROCESS_PARTIAL) { // yield! return TaskExecutionResult::TASK_NOT_FINISHED; @@ -273,6 +283,9 @@ SinkFinalizeType PhysicalCreateHNSWIndex::Finalize(Pipeline &pipeline, Event &ev auto &gstate = input.global_state.Cast(); auto &collection = gstate.collection; + // Move on to the next phase + gstate.is_building = true; + // Reserve the index size auto &index = gstate.global_index->index; index.reserve(collection->Count()); @@ -287,4 +300,17 @@ SinkFinalizeType PhysicalCreateHNSWIndex::Finalize(Pipeline &pipeline, Event &ev return SinkFinalizeType::READY; } +double PhysicalCreateHNSWIndex::GetSinkProgress(ClientContext &context, GlobalSinkState &gstate, + double source_progress) const { + // The "source_progress" is not relevant for CREATE INDEX statements + const auto &state = gstate.Cast(); + // First half of the progress is appending to the collection + if (!state.is_building) { + return 50.0 * + MinValue(1.0, static_cast(state.loaded_count) / static_cast(estimated_cardinality)); + } + // Second half is actually building the index + return 50.0 + (50.0 * static_cast(state.built_count) / static_cast(state.loaded_count)); +} + } // namespace duckdb \ No newline at end of file diff --git a/src/include/hnsw/hnsw_index_physical_create.hpp b/src/include/hnsw/hnsw_index_physical_create.hpp index 75c1ccc..b3de5b8 100644 --- a/src/include/hnsw/hnsw_index_physical_create.hpp +++ b/src/include/hnsw/hnsw_index_physical_create.hpp @@ -51,6 +51,8 @@ class PhysicalCreateHNSWIndex : public PhysicalOperator { bool ParallelSink() const override { return true; } + + double GetSinkProgress(ClientContext &context, GlobalSinkState &gstate, double source_progress) const override; }; } // namespace duckdb \ No newline at end of file From 72a5ee8f6d52ccc5040e6fa0028e081f7754fdba Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 26 Jun 2024 14:06:39 +0200 Subject: [PATCH 2/2] update duckdb --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index 8e757df..a5e12fe 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 8e757df15d1ad685afd0e05bb2e000741e0c2663 +Subproject commit a5e12fee059bfd374597f32a61986f7c2eaeb2e7