Skip to content

Commit

Permalink
bump delta to c901665b98b
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Jun 13, 2024
1 parent a6f85ef commit ef1dd70
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 11 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ endif()
ExternalProject_Add(
${KERNEL_NAME}
GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
GIT_TAG 823367e4dc13b627914412ee2ca7933a1c7b822a
GIT_TAG c901665b98b2fed5ff1c713a9666eba9d16ea281
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
Expand Down
5 changes: 0 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests
# Include the Makefile from extension-ci-tools
include extension-ci-tools/makefiles/duckdb_extension.Makefile

reldebug:
mkdir -p build/reldebug && \
cmake $(GENERATOR) $(BUILD_FLAGS) $(EXT_RELEASE_FLAGS) -DCMAKE_BUILD_TYPE=RelWithDebInfo -S ./duckdb/ -B build/reldebug && \
cmake --build build/reldebug --config RelWithDebInfo

# Generate some test data to test with
generate-data:
python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta
Expand Down
8 changes: 7 additions & 1 deletion scripts/generate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,14 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate
for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]:
generate_test_data_pyspark(f"tpch_sf0_01_{table}", f'tpch_sf0_01/{table}', f'{TMP_PATH}/tpch_sf0_01_export/{table}.parquet')

## TPCH SF1 full dataset
con = duckdb.connect()
con.query(f"call dbgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpch_sf1_export' (FORMAT parquet)")
for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]:
generate_test_data_pyspark(f"tpch_sf1_{table}", f'tpch_sf1/{table}', f'{TMP_PATH}/tpch_sf1_export/{table}.parquet')

## TPCDS SF0.01 full dataset
con = duckdb.connect()
con.query(f"call dsdgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpcds_sf0_01_export' (FORMAT parquet)")
for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]:
generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet')
generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet')
36 changes: 33 additions & 3 deletions src/delta_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "duckdb.hpp"
#include "duckdb/main/extension_util.hpp"
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include <duckdb/planner/filter/null_filter.hpp>

namespace duckdb {

Expand Down Expand Up @@ -257,16 +258,31 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co
case LogicalType::BIGINT:
right = visit_expression_literal_long(state, BigIntValue::Get(value));
break;


// case LogicalType::INTEGER:
// right = visit_expression_literal_int(state, IntegerValue::Get(value));
// break;
// case LogicalType::SMALLINT:
// right = visit_expression_literal_short(state, SmallIntValue::Get(value));
// break;
// case LogicalType::TINYINT:
// right = visit_expression_literal_byte(state, TinyIntValue::Get(value));
// break;
// case LogicalType::FLOAT:
// right = visit_expression_literal_float(state, FloatValue::Get(value));
// break;
// case LogicalType::DOUBLE:
// right = visit_expression_literal_double(state, DoubleValue::Get(value));
// break;
// case LogicalType::BOOLEAN:
// right = visit_expression_literal_bool(state, BooleanValue::Get(value));
// break;
case LogicalType::VARCHAR: {
// WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str())
auto str = StringValue::Get(value);
auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string");
break;
}

default:
break; // unsupported type
}
Expand Down Expand Up @@ -305,12 +321,26 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc
return visit_expression_and(state, &eit);
}

uintptr_t PredicateVisitor::VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
auto maybe_left = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitIsNull failed to visit_expression_column");
return ffi::visit_expression_is_null(state, left);
}

uintptr_t PredicateVisitor::VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
return ffi::visit_expression_not(state, VisitIsNull(col_name, state));
}

uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState* state) {
switch (filter.filter_type) {
case TableFilterType::CONSTANT_COMPARISON:
return VisitConstantFilter(col_name, static_cast<const ConstantFilter&>(filter), state);
case TableFilterType::CONJUNCTION_AND:
return VisitAndFilter(col_name, static_cast<const ConjunctionAndFilter&>(filter), state);
// case TableFilterType::IS_NULL:
// return VisitIsNull(col_name, state);
// case TableFilterType::IS_NOT_NULL:
// return VisitIsNotNull(col_name, state);
default:
return ~0;
}
Expand Down
2 changes: 2 additions & 0 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
StringUtil::RTrim(path_string, "/");
path_string += "/" + KernelUtils::FromDeltaString(path);

printf("Got File %s\n", path_string.c_str());

// First we append the file to our resolved files
context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
context->metadata.emplace_back(make_uniq<DeltaFileMetaData>());
Expand Down
5 changes: 5 additions & 0 deletions src/include/delta_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/common/enum_util.hpp"
#include <iostream>
#include <duckdb/planner/filter/null_filter.hpp>

// TODO: clean up this file as we go

Expand Down Expand Up @@ -140,6 +141,10 @@ class PredicateVisitor : public ffi::EnginePredicate {

uintptr_t VisitConstantFilter(const string &col_name, const ConstantFilter &filter, ffi::KernelExpressionVisitorState* state);
uintptr_t VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter, ffi::KernelExpressionVisitorState* state);

uintptr_t VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState* state);
uintptr_t VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState* state);

uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState* state);
};

Expand Down

0 comments on commit ef1dd70

Please sign in to comment.