Skip to content

Commit

Permalink
Merge pull request #59 from motherduckdb/pb/generate-small-data
Browse files Browse the repository at this point in the history
add generated_data to repo to facilitate CI
  • Loading branch information
samansmink authored Jul 1, 2024
2 parents be7538e + 31f6b38 commit 5b72b75
Show file tree
Hide file tree
Showing 86 changed files with 5,752 additions and 41 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ include extension-ci-tools/makefiles/duckdb_extension.Makefile

# Custom makefile targets
data: data_clean
python3 scripts/test_data_generator/generate_iceberg.py 0.01 data/iceberg/generated_spec1_0_01 1
python3 scripts/test_data_generator/generate_iceberg.py 0.01 data/iceberg/generated_spec2_0_01 2
python3 scripts/test_data_generator/generate_iceberg.py 0.001 data/iceberg/generated_spec1_0_001 1
python3 scripts/test_data_generator/generate_iceberg.py 0.001 data/iceberg/generated_spec2_0_001 2

data_large: data data_clean
python3 scripts/test_data_generator/generate_iceberg.py 1 data/iceberg/generated_spec2_1 2

data_clean:
rm -rf data/iceberg/generated_*
rm -rf data/iceberg/generated_*
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
count
7690
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- The query executed at this step:
ALTER TABLE iceberg_catalog.pyspark_iceberg_table
ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT;
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
{
"format-version" : 1,
"table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1",
"location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table",
"last-updated-ms" : 1719580919873,
"last-column-id" : 15,
"schema" : {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey_bool",
"required" : false,
"type" : "boolean"
}, {
"id" : 2,
"name" : "l_partkey_int",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey_long",
"required" : false,
"type" : "long"
}, {
"id" : 4,
"name" : "l_extendedprice_float",
"required" : false,
"type" : "float"
}, {
"id" : 5,
"name" : "l_extendedprice_double",
"required" : false,
"type" : "double"
}, {
"id" : 6,
"name" : "l_extendedprice_dec9_2",
"required" : false,
"type" : "decimal(9, 2)"
}, {
"id" : 7,
"name" : "l_extendedprice_dec18_6",
"required" : false,
"type" : "decimal(18, 6)"
}, {
"id" : 8,
"name" : "l_extendedprice_dec38_10",
"required" : false,
"type" : "decimal(38, 10)"
}, {
"id" : 9,
"name" : "l_shipdate_date",
"required" : false,
"type" : "date"
}, {
"id" : 10,
"name" : "l_partkey_time",
"required" : false,
"type" : "int"
}, {
"id" : 11,
"name" : "l_commitdate_timestamp",
"required" : false,
"type" : "timestamp"
}, {
"id" : 12,
"name" : "l_commitdate_timestamp_tz",
"required" : false,
"type" : "timestamptz"
}, {
"id" : 13,
"name" : "l_comment_string",
"required" : false,
"type" : "string"
}, {
"id" : 14,
"name" : "uuid",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_comment_blob",
"required" : false,
"type" : "binary"
} ]
},
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey_bool",
"required" : false,
"type" : "boolean"
}, {
"id" : 2,
"name" : "l_partkey_int",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey_long",
"required" : false,
"type" : "long"
}, {
"id" : 4,
"name" : "l_extendedprice_float",
"required" : false,
"type" : "float"
}, {
"id" : 5,
"name" : "l_extendedprice_double",
"required" : false,
"type" : "double"
}, {
"id" : 6,
"name" : "l_extendedprice_dec9_2",
"required" : false,
"type" : "decimal(9, 2)"
}, {
"id" : 7,
"name" : "l_extendedprice_dec18_6",
"required" : false,
"type" : "decimal(18, 6)"
}, {
"id" : 8,
"name" : "l_extendedprice_dec38_10",
"required" : false,
"type" : "decimal(38, 10)"
}, {
"id" : 9,
"name" : "l_shipdate_date",
"required" : false,
"type" : "date"
}, {
"id" : 10,
"name" : "l_partkey_time",
"required" : false,
"type" : "int"
}, {
"id" : 11,
"name" : "l_commitdate_timestamp",
"required" : false,
"type" : "timestamp"
}, {
"id" : 12,
"name" : "l_commitdate_timestamp_tz",
"required" : false,
"type" : "timestamptz"
}, {
"id" : 13,
"name" : "l_comment_string",
"required" : false,
"type" : "string"
}, {
"id" : 14,
"name" : "uuid",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_comment_blob",
"required" : false,
"type" : "binary"
} ]
} ],
"partition-spec" : [ ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "peter",
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : 9145725745960929259,
"refs" : {
"main" : {
"snapshot-id" : 9145725745960929259,
"type" : "branch"
}
},
"snapshots" : [ {
"snapshot-id" : 9145725745960929259,
"timestamp-ms" : 1719580919873,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1719580917302",
"added-data-files" : "1",
"added-records" : "6005",
"added-files-size" : "440845",
"changed-partition-count" : "1",
"total-records" : "6005",
"total-files-size" : "440845",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro",
"schema-id" : 0
} ],
"statistics" : [ ],
"snapshot-log" : [ {
"timestamp-ms" : 1719580919873,
"snapshot-id" : 9145725745960929259
} ],
"metadata-log" : [ ]
}
Loading

0 comments on commit 5b72b75

Please sign in to comment.