diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 71a222e14..79a3e6ba6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -116,7 +116,7 @@ cargo 1.69.0 (6e9a83356 2023-04-12) Currently, iceberg-rust uses Docker to set up environment for integration tests. Native Docker has some limitations, please check (https://github.com/apache/iceberg-rust/pull/748). Please use Orbstack or Podman. -For MacOS users, you can install [OrbStack](https://orbstack.dev/) as a docker alternative. +For MacOS users, you can install [OrbStack as a docker alternative](docs/contributing/orbstack.md). For podman, refer to [Using Podman instead of Docker](docs/contributing/podman.md) diff --git a/Cargo.lock b/Cargo.lock index ddb0c77b8..42388f85c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -670,9 +670,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "300a12520b4e6d08b73f77680f12c16e8ae43250d55100e0b2be46d78da16a48" +checksum = "44f6f1124d6e19ab6daf7f2e615644305dc6cb2d706892a8a8c0b98db35de020" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -717,9 +717,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc0cc08edc80d70edb091fad02537a719ed293ef871553ef8df192c92c415e4d" +checksum = "2111e5117b6e6bbe8c89ddca58e5c1339accc74a47757ab1e39db4f26999a426" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1365,7 +1365,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -2135,6 +2135,12 @@ dependencies = [ "syn 2.0.92", ] +[[package]] +name = "dissimilar" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f8e79d1fbf76bdfbde321e902714bf6c49df88a7dda6fc682fc2979226962d" + [[package]] name = "dlv-list" version = "0.5.2" @@ -2236,6 +2242,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "expect-test" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63af43ff4431e848fb47472a920f14fa71c24de13255a5692e93d4e90302acb0" +dependencies = [ + "dissimilar", + "once_cell", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2868,6 +2884,7 @@ dependencies = [ "chrono", "ctor", "derive_builder", + "expect-test", "fnv", "futures", "iceberg-catalog-memory", @@ -4658,9 +4675,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.10" +version = "0.12.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3536321cfc54baa8cf3e273d5e1f63f889067829c4b410fcdbac8ca7b80994" +checksum = "7fe060fe50f524be480214aba758c71f99f90ee8c83c5a36b5e9e1d568eb4eb3" dependencies = [ "base64 0.22.1", "bytes", @@ -5060,9 +5077,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] @@ -5078,9 +5095,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", @@ -6472,7 +6489,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml index 64b332611..772b328f3 100644 --- a/crates/catalog/s3tables/Cargo.toml +++ b/crates/catalog/s3tables/Cargo.toml @@ -32,7 +32,7 @@ keywords = ["iceberg", "sql", "catalog"] anyhow = { workspace = true } async-trait = { workspace = true } aws-config = { workspace = true } -aws-sdk-s3tables = "1.0.0" +aws-sdk-s3tables = "1.2.0" iceberg = { workspace = true } serde_json = { workspace = true } uuid = { workspace = true, features = ["v4"] } diff --git a/crates/iceberg/src/metadata_scan.rs b/crates/iceberg/src/metadata_scan.rs index bc6129860..287e82f11 100644 --- a/crates/iceberg/src/metadata_scan.rs +++ b/crates/iceberg/src/metadata_scan.rs @@ -25,67 +25,53 @@ use arrow_array::builder::{ use arrow_array::types::{Int32Type, Int64Type, Int8Type, TimestampMillisecondType}; use arrow_array::RecordBatch; use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; -use async_trait::async_trait; -use crate::io::FileIO; -use crate::spec::TableMetadataRef; +use crate::spec::TableMetadata; use crate::table::Table; use crate::Result; -/// Table metadata scan. +/// Metadata table is used to inspect a table's history, snapshots, and other metadata as a table. /// -/// Used to inspect a table's history, snapshots, and other metadata as a table. -/// -/// See also . +/// References: +/// - +/// - +/// - #[derive(Debug)] -pub struct MetadataScan { - metadata_ref: TableMetadataRef, - io: FileIO, -} +pub struct MetadataTable(Table); -impl MetadataScan { +impl MetadataTable { /// Creates a new metadata scan. - pub fn new(table: &Table) -> Self { - Self { - metadata_ref: table.metadata_ref(), - io: table.file_io().clone(), - } + pub(super) fn new(table: Table) -> Self { + Self(table) } - /// Returns the snapshots of the table. - pub async fn snapshots(&self) -> Result { - SnapshotsTable::scan(self).await + /// Get the snapshots table. + pub fn snapshots(&self) -> SnapshotsTable { + SnapshotsTable { + metadata_table: self, + } } - /// Returns the manifests of the table. - pub async fn manifests(&self) -> Result { - ManifestsTable::scan(self).await + /// Get the manifests table. + pub fn manifests(&self) -> ManifestsTable { + ManifestsTable { + metadata_table: self, + } } -} - -/// Table metadata scan. -/// -/// Use to inspect a table's history, snapshots, and other metadata as a table. -/// -/// References: -/// - -/// - -/// - -#[async_trait] -pub trait MetadataTable { - /// Returns the schema of the metadata table. - fn schema() -> Schema; - /// Scans the metadata table. - async fn scan(scan: &MetadataScan) -> Result; + fn metadata(&self) -> &TableMetadata { + self.0.metadata() + } } /// Snapshots table. -pub struct SnapshotsTable; +pub struct SnapshotsTable<'a> { + metadata_table: &'a MetadataTable, +} -#[async_trait] -impl MetadataTable for SnapshotsTable { - fn schema() -> Schema { +impl<'a> SnapshotsTable<'a> { + /// Returns the schema of the snapshots table. + pub fn schema(&self) -> Schema { Schema::new(vec![ Field::new( "committed_at", @@ -117,7 +103,8 @@ impl MetadataTable for SnapshotsTable { ]) } - async fn scan(scan: &MetadataScan) -> Result { + /// Scans the snapshots table. + pub fn scan(&self) -> Result { let mut committed_at = PrimitiveBuilder::::new().with_timezone("+00:00"); let mut snapshot_id = PrimitiveBuilder::::new(); @@ -126,7 +113,7 @@ impl MetadataTable for SnapshotsTable { let mut manifest_list = StringBuilder::new(); let mut summary = MapBuilder::new(None, StringBuilder::new(), StringBuilder::new()); - for snapshot in scan.metadata_ref.snapshots() { + for snapshot in self.metadata_table.metadata().snapshots() { committed_at.append_value(snapshot.timestamp_ms()); snapshot_id.append_value(snapshot.snapshot_id()); parent_id.append_option(snapshot.parent_snapshot_id()); @@ -139,7 +126,7 @@ impl MetadataTable for SnapshotsTable { summary.append(true)?; } - Ok(RecordBatch::try_new(Arc::new(Self::schema()), vec![ + Ok(RecordBatch::try_new(Arc::new(self.schema()), vec![ Arc::new(committed_at.finish()), Arc::new(snapshot_id.finish()), Arc::new(parent_id.finish()), @@ -151,10 +138,12 @@ impl MetadataTable for SnapshotsTable { } /// Manifests table. -pub struct ManifestsTable; +pub struct ManifestsTable<'a> { + metadata_table: &'a MetadataTable, +} -impl ManifestsTable { - fn partition_summary_fields() -> Vec { +impl<'a> ManifestsTable<'a> { + fn partition_summary_fields(&self) -> Vec { vec![ Field::new("contains_null", DataType::Boolean, false), Field::new("contains_nan", DataType::Boolean, true), @@ -162,11 +151,8 @@ impl ManifestsTable { Field::new("upper_bound", DataType::Utf8, true), ] } -} -#[async_trait] -impl MetadataTable for ManifestsTable { - fn schema() -> Schema { + fn schema(&self) -> Schema { Schema::new(vec![ Field::new("content", DataType::Int8, false), Field::new("path", DataType::Utf8, false), @@ -183,7 +169,7 @@ impl MetadataTable for ManifestsTable { "partition_summaries", DataType::List(Arc::new(Field::new_struct( "item", - ManifestsTable::partition_summary_fields(), + self.partition_summary_fields(), false, ))), false, @@ -191,7 +177,8 @@ impl MetadataTable for ManifestsTable { ]) } - async fn scan(scan: &MetadataScan) -> Result { + /// Scans the manifests table. + pub async fn scan(&self) -> Result { let mut content = PrimitiveBuilder::::new(); let mut path = StringBuilder::new(); let mut length = PrimitiveBuilder::::new(); @@ -204,18 +191,21 @@ impl MetadataTable for ManifestsTable { let mut existing_delete_files_count = PrimitiveBuilder::::new(); let mut deleted_delete_files_count = PrimitiveBuilder::::new(); let mut partition_summaries = ListBuilder::new(StructBuilder::from_fields( - Fields::from(ManifestsTable::partition_summary_fields()), + Fields::from(self.partition_summary_fields()), 0, )) .with_field(Arc::new(Field::new_struct( "item", - ManifestsTable::partition_summary_fields(), + self.partition_summary_fields(), false, ))); - if let Some(snapshot) = scan.metadata_ref.current_snapshot() { + if let Some(snapshot) = self.metadata_table.metadata().current_snapshot() { let manifest_list = snapshot - .load_manifest_list(&scan.io, &scan.metadata_ref) + .load_manifest_list( + &self.metadata_table.0.file_io(), + &self.metadata_table.0.metadata_ref(), + ) .await?; for manifest in manifest_list.entries() { content.append_value(manifest.content.clone() as i8); @@ -259,7 +249,7 @@ impl MetadataTable for ManifestsTable { } } - Ok(RecordBatch::try_new(Arc::new(Self::schema()), vec![ + Ok(RecordBatch::try_new(Arc::new(self.schema()), vec![ Arc::new(content.finish()), Arc::new(path.finish()), Arc::new(length.finish()), @@ -331,10 +321,10 @@ mod tests { )); } - #[tokio::test] - async fn test_snapshots_table() { + #[test] + fn test_snapshots_table() { let table = TableTestFixture::new().table; - let record_batch = table.metadata_scan().snapshots().await.unwrap(); + let record_batch = table.metadata_table().snapshots().scan().unwrap(); check_record_batch( record_batch, expect![[r#" @@ -407,7 +397,13 @@ mod tests { let mut fixture = TableTestFixture::new(); fixture.setup_manifest_files().await; - let record_batch = fixture.table.metadata_scan().manifests().await.unwrap(); + let record_batch = fixture + .table + .metadata_table() + .manifests() + .scan() + .await + .unwrap(); check_record_batch( record_batch, diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index f78e04e2f..fa5304855 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use crate::arrow::ArrowReaderBuilder; use crate::io::object_cache::ObjectCache; use crate::io::FileIO; -use crate::metadata_scan::MetadataScan; +use crate::metadata_scan::MetadataTable; use crate::scan::TableScanBuilder; use crate::spec::{TableMetadata, TableMetadataRef}; use crate::{Error, ErrorKind, Result, TableIdent}; @@ -201,9 +201,10 @@ impl Table { TableScanBuilder::new(self) } - /// Creates a metadata scan. See [`MetadataScan`] for more details. - pub fn metadata_scan(&self) -> MetadataScan { - MetadataScan::new(self) + /// Creates a metadata table which provides table-like APIs for inspecting metadata. + /// See [`MetadataTable`] for more details. + pub fn metadata_table(self) -> MetadataTable { + MetadataTable::new(self) } /// Returns the flag indicating whether the `Table` is readonly or not diff --git a/docs/contributing/orbstack.md b/docs/contributing/orbstack.md new file mode 100644 index 000000000..29eb09dc5 --- /dev/null +++ b/docs/contributing/orbstack.md @@ -0,0 +1,39 @@ + + +# OrbStack as a docker alternative on macOS +1. Install OrbStack by downloading [installer](https://orbstack.dev/download) or using Homebrew. + ```shell + brew install orbstack + ``` + +2. Migrate Docker data + ```shell + orbstack migrate docker + ``` + +3. (Optional) Add registry mirrors + + You can edit the config directly at `~/.orbstack/config/docker.json` and restart the engine with `orb restart docker`. + + ``` + { + "registry-mirrors": ["https://registry.docker.ir", "https://docker.iranserver.com"] + } + ``` \ No newline at end of file