Skip to content

Commit

Permalink
bump delta to 181232a45562, enable cardinalty estimation, fix varchar…
Browse files Browse the repository at this point in the history
… pushdown
  • Loading branch information
samansmink committed Jun 15, 2024
1 parent ef1dd70 commit 251d3bb
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 32 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ endif()
# Add rust_example as a CMake target
ExternalProject_Add(
${KERNEL_NAME}
GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
GIT_TAG c901665b98b2fed5ff1c713a9666eba9d16ea281
GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs"
GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
Expand Down
3 changes: 2 additions & 1 deletion scripts/generate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate

## CREATE
## CONFIGURE USAGE OF DELETION VECTORS
spark.sql(f"ALTER TABLE test_table_{name} SET TBLPROPERTIES ('delta.enableDeletionVectors' = true);")
if (delete_predicate):
spark.sql(f"ALTER TABLE test_table_{name} SET TBLPROPERTIES ('delta.enableDeletionVectors' = true);")

## ADDING DELETES
deltaTable = DeltaTable.forPath(spark, delta_table_path)
Expand Down
60 changes: 34 additions & 26 deletions src/delta_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ static bool CanHandleFilter(TableFilter *filter) {
switch (filter->filter_type) {
case TableFilterType::CONSTANT_COMPARISON:
return true;
case TableFilterType::IS_NULL:
return true;
case TableFilterType::IS_NOT_NULL:
return true;
case TableFilterType::CONJUNCTION_AND: {
auto &conjunction = static_cast<const ConjunctionAndFilter&>(*filter);
bool can_handle = true;
Expand Down Expand Up @@ -258,28 +262,28 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co
case LogicalType::BIGINT:
right = visit_expression_literal_long(state, BigIntValue::Get(value));
break;
// case LogicalType::INTEGER:
// right = visit_expression_literal_int(state, IntegerValue::Get(value));
// break;
// case LogicalType::SMALLINT:
// right = visit_expression_literal_short(state, SmallIntValue::Get(value));
// break;
// case LogicalType::TINYINT:
// right = visit_expression_literal_byte(state, TinyIntValue::Get(value));
// break;
// case LogicalType::FLOAT:
// right = visit_expression_literal_float(state, FloatValue::Get(value));
// break;
// case LogicalType::DOUBLE:
// right = visit_expression_literal_double(state, DoubleValue::Get(value));
// break;
// case LogicalType::BOOLEAN:
// right = visit_expression_literal_bool(state, BooleanValue::Get(value));
// break;
case LogicalType::INTEGER:
right = visit_expression_literal_int(state, IntegerValue::Get(value));
break;
case LogicalType::SMALLINT:
right = visit_expression_literal_short(state, SmallIntValue::Get(value));
break;
case LogicalType::TINYINT:
right = visit_expression_literal_byte(state, TinyIntValue::Get(value));
break;
case LogicalType::FLOAT:
right = visit_expression_literal_float(state, FloatValue::Get(value));
break;
case LogicalType::DOUBLE:
right = visit_expression_literal_double(state, DoubleValue::Get(value));
break;
case LogicalType::BOOLEAN:
right = visit_expression_literal_bool(state, BooleanValue::Get(value));
break;
case LogicalType::VARCHAR: {
// WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str())
auto str = StringValue::Get(value);
auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(str), DuckDBEngineError::AllocateError);
right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string");
break;
}
Expand Down Expand Up @@ -315,16 +319,20 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc
return 0;
}
auto &child_filter = *it++;

if (child_filter->filter_type == TableFilterType::IS_NULL || child_filter->filter_type == TableFilterType::IS_NOT_NULL) {
return 0;
}
return VisitFilter(col_name, *child_filter, state);
};
auto eit = EngineIteratorFromCallable(get_next);
return visit_expression_and(state, &eit);
}

uintptr_t PredicateVisitor::VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
auto maybe_left = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitIsNull failed to visit_expression_column");
return ffi::visit_expression_is_null(state, left);
auto maybe_inner = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
uintptr_t inner = KernelUtils::UnpackResult(maybe_inner, "VisitIsNull failed to visit_expression_column");
return ffi::visit_expression_is_null(state, inner);
}

uintptr_t PredicateVisitor::VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
Expand All @@ -337,10 +345,10 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte
return VisitConstantFilter(col_name, static_cast<const ConstantFilter&>(filter), state);
case TableFilterType::CONJUNCTION_AND:
return VisitAndFilter(col_name, static_cast<const ConjunctionAndFilter&>(filter), state);
// case TableFilterType::IS_NULL:
// return VisitIsNull(col_name, state);
// case TableFilterType::IS_NOT_NULL:
// return VisitIsNotNull(col_name, state);
case TableFilterType::IS_NULL:
return VisitIsNull(col_name, state);
case TableFilterType::IS_NOT_NULL:
return VisitIsNotNull(col_name, state);
default:
return ~0;
}
Expand Down
3 changes: 0 additions & 3 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
StringUtil::RTrim(path_string, "/");
path_string += "/" + KernelUtils::FromDeltaString(path);

printf("Got File %s\n", path_string.c_str());

// First we append the file to our resolved files
context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
context->metadata.emplace_back(make_uniq<DeltaFileMetaData>());
Expand Down Expand Up @@ -589,7 +587,6 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance
function.deserialize = nullptr;
function.statistics = nullptr;
function.table_scan_progress = nullptr;
function.cardinality = nullptr;
function.get_bind_info = nullptr;

// Schema param is just confusing here
Expand Down

0 comments on commit 251d3bb

Please sign in to comment.