Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Oct 18, 2024
1 parent 48d98dc commit bff9325
Show file tree
Hide file tree
Showing 15 changed files with 79 additions and 57 deletions.
4 changes: 3 additions & 1 deletion benchmark/micro/snapshot_performance/delta_scan.benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ require delta

require parquet

require httpfs

run
SELECT COUNT(*) FROM delta_scan('data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake')
SELECT COUNT(*) FROM delta_scan('s3://test-bucket-ceiveran/delta_benchmarking/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake')

result I
6001215
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ require delta

require parquet

require httpfs

run
SELECT COUNT(*) FROM delta_scan('data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake') where l_orderkey is not null
SELECT COUNT(*) FROM delta_scan('s3://test-bucket-ceiveran/delta_benchmarking/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake') where l_orderkey is not null

result I
6001215
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ require delta

require parquet

require httpfs

load
ATTACH 'data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_no_pin (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_no_pin (TYPE delta);

run
SELECT COUNT(*) FROM lineitem_no_pin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ require delta

require parquet

require httpfs

load
ATTACH 'data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_no_pin (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_no_pin (TYPE delta);

run
SELECT COUNT(*) FROM lineitem_no_pin where l_orderkey is not null
Expand Down
4 changes: 3 additions & 1 deletion benchmark/micro/snapshot_performance/snapshot_pin.benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ require delta

require parquet

require httpfs

load
ATTACH 'data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_pin (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_pin (TYPE delta, PIN_SNAPSHOT);

run
SELECT COUNT(*) FROM lineitem_pin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ require delta

require parquet

require httpfs

load
ATTACH 'data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_pin (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_pin (TYPE delta, PIN_SNAPSHOT);

run
SELECT COUNT(*) FROM lineitem_pin where l_orderkey is not null
Expand Down
2 changes: 1 addition & 1 deletion benchmark/tpcds/sf1/local/delta/load.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SET VARIABLE delta_path = './data/generated/tpcds_sf1';
SET VARIABLE delta_path = 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark';

create view call_center as from delta_scan(getvariable('delta_path') || '/call_center/delta_lake');
create view catalog_page as from delta_scan(getvariable('delta_path') || '/catalog_page/delta_lake');
Expand Down
2 changes: 2 additions & 0 deletions benchmark/tpcds/sf1/local/delta/tpcds_sf1.benchmark.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ require delta

require parquet

require httpfs

load benchmark/tpcds/sf1/local/delta/load.sql

run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql
Expand Down
48 changes: 24 additions & 24 deletions benchmark/tpcds/sf1/local/delta_attach/load.sql
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
ATTACH './data/generated/tpcds_sf1/call_center/delta_lake' as call_center (TYPE delta);
ATTACH './data/generated/tpcds_sf1/catalog_page/delta_lake' as catalog_page (TYPE delta);
ATTACH './data/generated/tpcds_sf1/catalog_returns/delta_lake' as catalog_returns (TYPE delta);
ATTACH './data/generated/tpcds_sf1/catalog_sales/delta_lake' as catalog_sales (TYPE delta);
ATTACH './data/generated/tpcds_sf1/customer/delta_lake' as customer (TYPE delta);
ATTACH './data/generated/tpcds_sf1/customer_demographics/delta_lake' as customer_demographics (TYPE delta);
ATTACH './data/generated/tpcds_sf1/customer_address/delta_lake' as customer_address (TYPE delta);
ATTACH './data/generated/tpcds_sf1/date_dim/delta_lake' as date_dim (TYPE delta);
ATTACH './data/generated/tpcds_sf1/household_demographics/delta_lake' as household_demographics (TYPE delta);
ATTACH './data/generated/tpcds_sf1/inventory/delta_lake' as inventory (TYPE delta);
ATTACH './data/generated/tpcds_sf1/income_band/delta_lake' as income_band (TYPE delta);
ATTACH './data/generated/tpcds_sf1/item/delta_lake' as item (TYPE delta);
ATTACH './data/generated/tpcds_sf1/promotion/delta_lake' as promotion (TYPE delta);
ATTACH './data/generated/tpcds_sf1/reason/delta_lake' as reason (TYPE delta);
ATTACH './data/generated/tpcds_sf1/ship_mode/delta_lake' as ship_mode (TYPE delta);
ATTACH './data/generated/tpcds_sf1/store/delta_lake' as store (TYPE delta);
ATTACH './data/generated/tpcds_sf1/store_returns/delta_lake' as store_returns (TYPE delta);
ATTACH './data/generated/tpcds_sf1/store_sales/delta_lake' as store_sales (TYPE delta);
ATTACH './data/generated/tpcds_sf1/time_dim/delta_lake' as time_dim (TYPE delta);
ATTACH './data/generated/tpcds_sf1/warehouse/delta_lake' as warehouse (TYPE delta);
ATTACH './data/generated/tpcds_sf1/web_page/delta_lake' as web_page (TYPE delta);
ATTACH './data/generated/tpcds_sf1/web_returns/delta_lake' as web_returns (TYPE delta);
ATTACH './data/generated/tpcds_sf1/web_sales/delta_lake' as web_sales (TYPE delta);
ATTACH './data/generated/tpcds_sf1/web_site/delta_lake' as web_site (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/call_center/delta_lake' as call_center (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/catalog_page/delta_lake' as catalog_page (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/catalog_returns/delta_lake' as catalog_returns (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/catalog_sales/delta_lake' as catalog_sales (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/customer/delta_lake' as customer (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/customer_demographics/delta_lake' as customer_demographics (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/customer_address/delta_lake' as customer_address (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/date_dim/delta_lake' as date_dim (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/household_demographics/delta_lake' as household_demographics (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/inventory/delta_lake' as inventory (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/income_band/delta_lake' as income_band (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/item/delta_lake' as item (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/promotion/delta_lake' as promotion (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/reason/delta_lake' as reason (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/ship_mode/delta_lake' as ship_mode (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/store/delta_lake' as store (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/store_returns/delta_lake' as store_returns (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/store_sales/delta_lake' as store_sales (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/time_dim/delta_lake' as time_dim (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/warehouse/delta_lake' as warehouse (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_page/delta_lake' as web_page (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_returns/delta_lake' as web_returns (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_sales/delta_lake' as web_sales (TYPE delta);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_site/delta_lake' as web_site (TYPE delta);
2 changes: 2 additions & 0 deletions benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ require delta

require parquet

require httpfs

load benchmark/tpcds/sf1/local/delta_attach/load.sql

run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql
Expand Down
48 changes: 24 additions & 24 deletions benchmark/tpcds/sf1/local/delta_attach_pin/load.sql
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
ATTACH './data/generated/tpcds_sf1/call_center/delta_lake' as call_center (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/catalog_page/delta_lake' as catalog_page (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/catalog_returns/delta_lake' as catalog_returns (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/catalog_sales/delta_lake' as catalog_sales (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/customer/delta_lake' as customer (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/customer_demographics/delta_lake' as customer_demographics (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/customer_address/delta_lake' as customer_address (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/date_dim/delta_lake' as date_dim (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/household_demographics/delta_lake' as household_demographics (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/inventory/delta_lake' as inventory (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/income_band/delta_lake' as income_band (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/item/delta_lake' as item (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/promotion/delta_lake' as promotion (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/reason/delta_lake' as reason (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/ship_mode/delta_lake' as ship_mode (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/store/delta_lake' as store (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/store_returns/delta_lake' as store_returns (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/store_sales/delta_lake' as store_sales (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/time_dim/delta_lake' as time_dim (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/warehouse/delta_lake' as warehouse (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/web_page/delta_lake' as web_page (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/web_returns/delta_lake' as web_returns (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/web_sales/delta_lake' as web_sales (TYPE delta, PIN_SNAPSHOT);
ATTACH './data/generated/tpcds_sf1/web_site/delta_lake' as web_site (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/call_center/delta_lake' as call_center (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/catalog_page/delta_lake' as catalog_page (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/catalog_returns/delta_lake' as catalog_returns (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/catalog_sales/delta_lake' as catalog_sales (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/customer/delta_lake' as customer (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/customer_demographics/delta_lake' as customer_demographics (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/customer_address/delta_lake' as customer_address (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/date_dim/delta_lake' as date_dim (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/household_demographics/delta_lake' as household_demographics (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/inventory/delta_lake' as inventory (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/income_band/delta_lake' as income_band (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/item/delta_lake' as item (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/promotion/delta_lake' as promotion (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/reason/delta_lake' as reason (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/ship_mode/delta_lake' as ship_mode (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/store/delta_lake' as store (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/store_returns/delta_lake' as store_returns (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/store_sales/delta_lake' as store_sales (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/time_dim/delta_lake' as time_dim (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/warehouse/delta_lake' as warehouse (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_page/delta_lake' as web_page (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_returns/delta_lake' as web_returns (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_sales/delta_lake' as web_sales (TYPE delta, PIN_SNAPSHOT);
ATTACH 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/web_site/delta_lake' as web_site (TYPE delta, PIN_SNAPSHOT);
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ require delta

require parquet

require httpfs

load benchmark/tpcds/sf1/local/delta_attach_pin/load.sql

run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql
Expand Down
4 changes: 3 additions & 1 deletion benchmark/tpcds/sf1/local/duckdb/tpcds_sf1.benchmark.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ require delta

require parquet

require httpfs

require tpcds

run
attach './data/generated/tpcds_sf1/duckdb.db' as tpcds;
attach 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark/duckdb.db' as tpcds;
use tpcds;
pragma tpcds(${QUERY_NUMBER})

Expand Down
2 changes: 1 addition & 1 deletion benchmark/tpcds/sf1/local/parquet/load.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SET VARIABLE parquet_path = './data/generated/tpcds_sf1';
SET VARIABLE parquet_path = 's3://test-bucket-ceiveran/delta_benchmarking/tpcds_sf1_pyspark';

create view call_center as from parquet_scan(getvariable('parquet_path') || '/call_center/parquet/**/*.parquet');
create view catalog_page as from parquet_scan(getvariable('parquet_path') || '/catalog_page/parquet/**/*.parquet');
Expand Down
2 changes: 2 additions & 0 deletions benchmark/tpcds/sf1/local/parquet/tpcds_sf1.benchmark.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ require delta

require parquet

require httpfs

load benchmark/tpcds/sf1/local/parquet/load.sql

run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql
Expand Down

0 comments on commit bff9325

Please sign in to comment.