diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index e7bf588..eaf9d30 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -116,6 +116,11 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate query = "CREATE table test_table AS SELECT {'i':i, 'j':i+1} as value, i%2 as part from range(0,10) tbl(i);" generate_test_data_delta_rs("simple_partitioned_with_structs", query, "part"); +## Partitioned table with all types we can file skip on +for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]: + query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)" + generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part"); + ## Simple table with deletion vector con = duckdb.connect() con.query(f"COPY (SELECT i as id, ('val' || i::VARCHAR) as value FROM range(0,1000000) tbl(i))TO '{TMP_PATH}/simple_sf1_with_dv.parquet'") diff --git a/test/sql/generated/file_skipping_all_types.test b/test/sql/generated/file_skipping_all_types.test new file mode 100644 index 0000000..e4348e8 --- /dev/null +++ b/test/sql/generated/file_skipping_all_types.test @@ -0,0 +1,44 @@ +# name: test/sql/generated/file_skipping_all_types.test +# description: Test filter pushdown succeeds on all file types we can push down +# group: [delta_generated] + +require parquet + +require delta + +require-env GENERATED_DATA_AVAILABLE + +# TODO: this doesn't appear to skip files yet +# TODO: add tests once https://github.com/duckdb/duckdb/pull/12488 is available + +query I +select value +from delta_scan('./data/generated/test_file_skipping/bool/delta_lake') +where part != false +order by value +---- +true + +foreach type bool int tinyint smallint bigint varchar + +query I +select value +from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +where part != 0 +order by value +---- +1 + +endloop + +foreach type float double + +query I +select value +from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +where part > 0.5 +order by value +---- +1.0 + +endloop