Skip to content

Commit

Permalink
add cardinality estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Jun 27, 2024
1 parent 57bbfb4 commit bd2b9ae
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 1 deletion.
19 changes: 18 additions & 1 deletion src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ string url_decode(string input) {
return result;
}

static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::Stats *, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) {
static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::Stats * stats, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) {
auto context = (DeltaSnapshot *) engine_context;
auto path_string = context->GetPath();
StringUtil::RTrim(path_string, "/");
Expand All @@ -61,6 +61,7 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
// Initialize the file metadata
context->metadata.back()->delta_snapshot_version = context->version;
context->metadata.back()->file_number = context->resolved_files.size() - 1;
context->metadata.back()->cardinality = stats->num_records;

// Fetch the deletion vector
auto selection_vector_res = ffi::selection_vector_from_dv(dv_info, context->extern_engine.get(), context->global_state.get());
Expand Down Expand Up @@ -292,6 +293,22 @@ idx_t DeltaSnapshot::GetTotalFileCount() {
return resolved_files.size();
}

unique_ptr<NodeStatistics> DeltaSnapshot::GetCardinality(ClientContext &context) {
// This also ensures all files are expanded
auto total_file_count = DeltaSnapshot::GetTotalFileCount();

if (total_file_count == 0) {
return make_uniq<NodeStatistics>(0,0);
}

idx_t total_tuple_count = 0;
for (auto &metadatum : metadata) {
total_tuple_count += metadatum->cardinality;
}

return make_uniq<NodeStatistics>(total_tuple_count,total_tuple_count);
}

unique_ptr<MultiFileReader> DeltaMultiFileReader::CreateInstance() {
return std::move(make_uniq<DeltaMultiFileReader>());
}
Expand Down
3 changes: 3 additions & 0 deletions src/include/functions/delta_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct DeltaFileMetaData {

idx_t delta_snapshot_version = DConstants::INVALID_INDEX;
idx_t file_number = DConstants::INVALID_INDEX;
idx_t cardinality = DConstants::INVALID_INDEX;
ffi::KernelBoolSlice selection_vector = {nullptr, 0};
case_insensitive_map_t<string> partition_map;
};
Expand All @@ -49,6 +50,8 @@ struct DeltaSnapshot : public MultiFileList {
FileExpandResult GetExpandResult() override;
idx_t GetTotalFileCount() override;

unique_ptr<NodeStatistics> GetCardinality(ClientContext &context) override;

Check failure on line 53 in src/include/functions/delta_scan.hpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

only virtual member functions can be marked 'override'

Check failure on line 53 in src/include/functions/delta_scan.hpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)

only virtual member functions can be marked 'override'

Check failure on line 53 in src/include/functions/delta_scan.hpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64_gcc4, quay.io/pypa/manylinux2014_x86_64, x64-linux)

‘duckdb::unique_ptr<duckdb::NodeStatistics> duckdb::DeltaSnapshot::GetCardinality(duckdb::ClientContext&)’ marked ‘override’, but does not override

Check failure on line 53 in src/include/functions/delta_scan.hpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64_gcc4, quay.io/pypa/manylinux2014_x86_64, x64-linux)

‘duckdb::unique_ptr<duckdb::NodeStatistics> duckdb::DeltaSnapshot::GetCardinality(duckdb::ClientContext&)’ marked ‘override’, but does not override

Check failure on line 53 in src/include/functions/delta_scan.hpp

View workflow job for this annotation

GitHub Actions / Generated Tests (Linux)

‘duckdb::unique_ptr<duckdb::NodeStatistics> duckdb::DeltaSnapshot::GetCardinality(duckdb::ClientContext&)’ marked ‘override’, but does not override

Check failure on line 53 in src/include/functions/delta_scan.hpp

View workflow job for this annotation

GitHub Actions / Generated Tests (Linux)

‘duckdb::unique_ptr<duckdb::NodeStatistics> duckdb::DeltaSnapshot::GetCardinality(duckdb::ClientContext&)’ marked ‘override’, but does not override

protected:
//! Get the i-th expanded file
string GetFile(idx_t i) override;
Expand Down
19 changes: 19 additions & 0 deletions test/sql/dat/basic_append.test
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
----
5

# Cardinality estimation should correctly show this
query II
EXPLAIN FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
----
physical_plan <REGEX>:.*EC: 5 .*

query I
SELECT count(number)
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
Expand Down Expand Up @@ -78,3 +84,16 @@ SELECT a_float, number, letter
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
WHERE number > 6
----

# Filters are reflected in cardinality estimation: filtering out all files shows 0 EC
query II
EXPLAIN FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
WHERE number > 6
----
physical_plan <REGEX>:.*EC: 0 .*

query II
EXPLAIN FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
WHERE number > 4
----
physical_plan <REGEX>:.*EC: 1 .*
5 changes: 5 additions & 0 deletions test/sql/delta_kernel_rs/basic_partitioned.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ e 5 5.5
a 1 1.1
b 2 2.2
c 3 3.3

query II
EXPLAIN FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned')
----
physical_plan <REGEX>:.*EC: 6 .*

0 comments on commit bd2b9ae

Please sign in to comment.