Merge branch 'master' into cosn

apache · Jan 7, 2025 · 6cda28e · 6cda28e
2 parents 9b0aeb0 + a19a8d7
commit 6cda28e
Show file tree

Hide file tree

Showing 835 changed files with 24,551 additions and 6,533 deletions.
diff --git a/LICENSE b/LICENSE
@@ -270,6 +270,11 @@ paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
 paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
 from https://parquet.apache.org/ version 1.14.0
 
+paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariant.java
+paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantBuilder.java
+paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantUtil.java
+from https://spark.apache.org/ version 4.0.0-preview2
+
 MIT License
 -----------
 

diff --git a/NOTICE b/NOTICE
@@ -1,5 +1,5 @@
 Apache Paimon
-Copyright 2023-2024 The Apache Software Foundation
+Copyright 2023-2025 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).

diff --git a/docs/config.toml b/docs/config.toml
@@ -34,11 +34,11 @@ pygmentsUseClasses = true
   # we change the version for the complete docs when forking of a release branch
   # etc.
   # The full version string as referenced in Maven (e.g. 1.2.1)
-  Version = "1.0-SNAPSHOT"
+  Version = "1.1-SNAPSHOT"
 
   # For stable releases, leave the bugfix version out (e.g. 1.2). For snapshot
   # release this should be the same as the regular version
-  VersionTitle = "1.0-SNAPSHOT"
+  VersionTitle = "1.1-SNAPSHOT"
 
   # The branch for this version of Apache Paimon
   Branch = "master"
@@ -67,11 +67,12 @@ pygmentsUseClasses = true
     ["JavaDocs", "//paimon.apache.org/docs/master/api/java/"],
   ]
 
-  StableDocs = "https://paimon.apache.org/docs/0.9"
+  StableDocs = "https://paimon.apache.org/docs/1.0"
 
   PreviousDocs = [
     ["master", "https://paimon.apache.org/docs/master"],
-    ["stable", "https://paimon.apache.org/docs/0.9"],
+    ["stable", "https://paimon.apache.org/docs/1.0"],
+    ["1.0", "https://paimon.apache.org/docs/1.0"],
     ["0.9", "https://paimon.apache.org/docs/0.9"],
     ["0.8", "https://paimon.apache.org/docs/0.8"],
   ]

diff --git a/docs/content/append-table/bucketed.md b/docs/content/append-table/bucketed.md
@@ -196,4 +196,4 @@ The `spark.sql.sources.v2.bucketing.enabled` config is used to enable bucketing
 Spark will recognize the specific distribution reported by a V2 data source through SupportsReportPartitioning, and
 will try to avoid shuffle if necessary.
 
-The costly join shuffle will be avoided if two tables have same bucketing strategy and same number of buckets.
+The costly join shuffle will be avoided if two tables have the same bucketing strategy and same number of buckets.
diff --git a/docs/content/append-table/query-performance.md b/docs/content/append-table/query-performance.md
@@ -35,7 +35,7 @@ filtering, if the filtering effect is good, the query would have been minutes of
 milliseconds to complete the execution.
 
 Often the data distribution is not always effective filtering, so if we can sort the data by the field in `WHERE` condition?
-You can take a look to [Flink COMPACT Action]({{< ref "maintenance/dedicated-compaction#sort-compact" >}}) or
+You can take a look at [Flink COMPACT Action]({{< ref "maintenance/dedicated-compaction#sort-compact" >}}) or
 [Flink COMPACT Procedure]({{< ref "flink/procedures" >}}) or [Spark COMPACT Procedure]({{< ref "spark/procedures" >}}).
 
 ## Data Skipping By File Index
@@ -54,7 +54,7 @@ file is too small, it will be stored directly in the manifest, otherwise in the
 corresponds to an index file, which has a separate file definition and can contain different types of indexes with
 multiple columns.
 
-Different file index may be efficient in different scenario. For example bloom filter may speed up query in point lookup
+Different file indexes may be efficient in different scenarios. For example bloom filter may speed up query in point lookup
 scenario. Using a bitmap may consume more space but can result in greater accuracy.
 
 `Bloom Filter`:

diff --git a/docs/content/cdc-ingestion/kafka-cdc.md b/docs/content/cdc-ingestion/kafka-cdc.md
@@ -198,10 +198,15 @@ To use this feature through `flink run`, run the following shell command.
     kafka_sync_database
     --warehouse <warehouse-path> \
     --database <database-name> \
+    [--table_mapping <table-name>=<paimon-table-name>] \
     [--table_prefix <paimon-table-prefix>] \
     [--table_suffix <paimon-table-suffix>] \
+    [--table_prefix_db <paimon-table-prefix-by-db>] \
+    [--table_suffix_db <paimon-table-suffix-by-db>] \
     [--including_tables <table-name|name-regular-expr>] \
     [--excluding_tables <table-name|name-regular-expr>] \
+    [--including_dbs <database-name|name-regular-expr>] \
+    [--excluding_dbs <database-name|name-regular-expr>] \
     [--type_mapping to-string] \
     [--partition_keys <partition_keys>] \
     [--primary_keys <primary-keys>] \

diff --git a/docs/content/cdc-ingestion/mysql-cdc.md b/docs/content/cdc-ingestion/mysql-cdc.md
@@ -34,11 +34,15 @@ Download `CDC Bundled Jar` and put them under <FLINK_HOME>/lib/.
 
 | Version | Bundled Jar                                                                                                             |
 |---------|-------------------------------------------------------------------------------------------------------------------------|
-| 2.3.x   | <br/>Only supported in versions below 0.8.2<li> flink-sql-connector-mysql-cdc-2.3.x.jar                                 |
-| 2.4.x   | <br/>Only supported in versions below 0.8.2<li> flink-sql-connector-mysql-cdc-2.4.x.jar                                 |
-| 3.0.x   | <br/>Only supported in versions below 0.8.2<li> flink-sql-connector-mysql-cdc-3.0.x.jar <li> flink-cdc-common-3.0.x.jar |
 | 3.1.x   | <li> flink-sql-connector-mysql-cdc-3.1.x.jar <li> mysql-connector-java-8.0.27.jar                                       |
 
+{{< hint danger >}}
+Only cdc 3.1+ is supported.
+
+You can download the `flink-connector-mysql-cdc` jar package by clicking [here](https://repo.maven.apache.org/maven2/org/apache/flink/flink-connector-mysql-cdc/).
+
+{{< /hint >}}
+
 ## Synchronizing Tables
 
 By using [MySqlSyncTableAction](/docs/{{< param Branch >}}/api/java/org/apache/paimon/flink/action/cdc/mysql/MySqlSyncTableAction) in a Flink DataStream job or directly through `flink run`, users can synchronize one or multiple tables from MySQL into one Paimon table.
@@ -261,5 +265,5 @@ to avoid potential name conflict.
 ## FAQ
 
 1. Chinese characters in records ingested from MySQL are garbled.
-* Try to set `env.java.opts: -Dfile.encoding=UTF-8` in `flink-conf.yaml`
+* Try to set `env.java.opts: -Dfile.encoding=UTF-8` in `flink-conf.yaml`(Flink version < 1.19) or `config.yaml`(Flink version >= 1.19)
 (the option is changed to `env.java.opts.all` since Flink-1.17).
diff --git a/docs/content/concepts/spec/datafile.md b/docs/content/concepts/spec/datafile.md
@@ -83,11 +83,45 @@ relationship between various table types and buckets in Paimon:
 The name of data file is `data-${uuid}-${id}.${format}`. For the append table, the file stores the data of the table
 without adding any new columns. But for the primary key table, each row of data stores additional system columns:
 
-1. `_VALUE_KIND`: row is deleted or added. Similar to RocksDB, each row of data can be deleted or added, which will be
+## Table with Primary key Data File
+
+1. Primary key columns, `_KEY_` prefix to key columns, this is to avoid conflicts with columns of the table. It's optional,
+   Paimon version 1.0 and above will retrieve the primary key fields from value_columns.
+2. `_VALUE_KIND`: TINYINT, row is deleted or added. Similar to RocksDB, each row of data can be deleted or added, which will be
    used for updating the primary key table.
-2. `_SEQUENCE_NUMBER`: this number is used for comparison during updates, determining which data came first and which
+3. `_SEQUENCE_NUMBER`: BIGINT, this number is used for comparison during updates, determining which data came first and which
    data came later.
-3. `_KEY_` prefix to key columns, this is to avoid conflicts with columns of the table.
+4. Value columns. All columns declared in the table.
+
+For example, data file for table:
+
+```sql
+CREATE TABLE T (
+    a INT PRIMARY KEY NOT ENFORCED,
+    b INT,
+    c INT
+);
+```
+
+Its file has 6 columns: `_KEY_a`, `_VALUE_KIND`, `_SEQUENCE_NUMBER`, `a`, `b`, `c`.
+
+When `data-file.thin-mode` enabled, its file has 5 columns: `_VALUE_KIND`, `_SEQUENCE_NUMBER`, `a`, `b`, `c`.
+
+## Table w/o Primary key Data File
+
+- Value columns. All columns declared in the table.
+
+For example, data file for table:
+
+```sql
+CREATE TABLE T (
+    a INT,
+    b INT,
+    c INT
+);
+```
+
+Its file has 3 columns: `a`, `b`, `c`.
 
 ## Changelog File
 

diff --git a/docs/content/concepts/spec/fileindex.md b/docs/content/concepts/spec/fileindex.md
@@ -83,7 +83,7 @@ redundant bytes:                  var bytes (for compatibility with later versio
 BODY:                             column index bytes + column index bytes + column index bytes + .......
 </pre>
 
-## Column Index Bytes: BloomFilter
+## Index: BloomFilter 
 
 Define `'file-index.bloom-filter.columns'`.
 
@@ -94,7 +94,7 @@ Content of bloom filter index is simple:
 This class use (64-bits) long hash. Store the num hash function (one integer) and bit set bytes only. Hash bytes type 
 (like varchar, binary, etc.) using xx hash, hash numeric type by [specified number hash](http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm).
 
-## Column Index Bytes: Bitmap
+## Index: Bitmap
 
 Define `'file-index.bitmap.columns'`.
 
@@ -137,7 +137,7 @@ offset:                        4 bytes int (when it is negative, it represents t
 
 Integer are all BIT_ENDIAN.
 
-## Column Index Bytes: Bit-Slice Index Bitmap
+## Index: Bit-Slice Index Bitmap
 
 BSI file index is a numeric range index, used to accelerate range query, it can use with bitmap index.
 

diff --git a/docs/content/concepts/spec/manifest.md b/docs/content/concepts/spec/manifest.md
@@ -35,13 +35,13 @@ under the License.
 
 Manifest List includes meta of several manifest files. Its name contains UUID, it is a avro file, the schema is:
 
-1. fileName: manifest file name.
-2. fileSize: manifest file size.
-3. numAddedFiles: number added files in manifest.
-4. numDeletedFiles: number deleted files in manifest.
-5. partitionStats: partition stats, the minimum and maximum values of partition fields in this manifest are beneficial
+1. _FILE_NAME: STRING, manifest file name.
+2. _FILE_SIZE: BIGINT, manifest file size.
+3. _NUM_ADDED_FILES: BIGINT, number added files in manifest.
+4. _NUM_DELETED_FILES: BIGINT, number deleted files in manifest.
+5. _PARTITION_STATS: SimpleStats, partition stats, the minimum and maximum values of partition fields in this manifest are beneficial
    for skipping certain manifest files during queries, it is a SimpleStats.
-6. schemaId: schema id when writing this manifest file.
+6. _SCHEMA_ID: BIGINT, schema id when writing this manifest file.
 
 ## Manifest
 
@@ -63,31 +63,32 @@ Data Manifest includes meta of several data files or changelog files.
 
 The schema is:
 
-1. kind: ADD or DELETE,
-2. partition: partition spec, a BinaryRow.
-3. bucket: bucket of this file.
-4. totalBuckets: total buckets when write this file, it is used for verification after bucket changes.
-5. file: data file meta.
+1. _KIND: TINYINT, ADD or DELETE,
+2. _PARTITION: BYTES, partition spec, a BinaryRow.
+3. _BUCKET: INT, bucket of this file.
+4. _TOTAL_BUCKETS: INT, total buckets when write this file, it is used for verification after bucket changes.
+5. _FILE: data file meta.
 
 The data file meta is:
 
-1. fileName: file name.
-2. fileSize: file size.
-3. rowCount: total number of rows (including add & delete) in this file.
-4. minKey: the minimum key of this file.
-5. maxKey: the maximum key of this file.
-6. keyStats: the statistics of the key.
-7. valueStats: the statistics of the value.
-8. minSequenceNumber: the minimum sequence number.
-9. maxSequenceNumber: the maximum sequence number.
-10. schemaId: schema id when write this file.
-11. level: level of this file, in LSM.
-12. extraFiles: extra files for this file, for example, data file index file.
-13. creationTime: creation time of this file.
-14. deleteRowCount: rowCount = addRowCount + deleteRowCount.
-15. embeddedIndex: if data file index is too small, store the index in manifest.
-16. fileSource: indicate whether this file is generated as an APPEND or COMPACT file
-17. valueStatsCols: statistical column in metadata 
+1. _FILE_NAME: STRING, file name.
+2. _FILE_SIZE: BIGINT, file size.
+3. _ROW_COUNT: BIGINT, total number of rows (including add & delete) in this file.
+4. _MIN_KEY: STRING, the minimum key of this file.
+5. _MAX_KEY: STRING, the maximum key of this file.
+6. _KEY_STATS: SimpleStats, the statistics of the key.
+7. _VALUE_STATS: SimpleStats, the statistics of the value.
+8. _MIN_SEQUENCE_NUMBER: BIGINT, the minimum sequence number.
+9. _MAX_SEQUENCE_NUMBER: BIGINT, the maximum sequence number.
+10. _SCHEMA_ID: BIGINT, schema id when write this file.
+11. _LEVEL: INT, level of this file, in LSM.
+12. _EXTRA_FILES: ARRAY<STRING>, extra files for this file, for example, data file index file.
+13. _CREATION_TIME: TIMESTAMP_MILLIS, creation time of this file.
+14. _DELETE_ROW_COUNT: BIGINT, rowCount = addRowCount + deleteRowCount.
+15. _EMBEDDED_FILE_INDEX: BYTES, if data file index is too small, store the index in manifest.
+16. _FILE_SOURCE: TINYINT, indicate whether this file is generated as an APPEND or COMPACT file.
+17. _VALUE_STATS_COLS: ARRAY<STRING>, statistical column in metadata.
+18. _EXTERNAL_PATH: external path of this file, null if it is in warehouse.
 
 ### Index Manifest
 
@@ -100,16 +101,35 @@ Index Manifest includes meta of several [table-index]({{< ref "concepts/spec/tab
 
 The schema is:
 
-1. kind: ADD or DELETE,
-2. partition: partition spec, a BinaryRow.
-3. bucket: bucket of this file.
-4. indexFile: index file meta.
+1. _KIND: TINYINT, ADD or DELETE,
+2. _PARTITION: BYTES, partition spec, a BinaryRow.
+3. _BUCKET: INT, bucket of this file.
+4. _INDEX_TYPE: STRING, "HASH" or "DELETION_VECTORS".
+5. _FILE_NAME: STRING, file name.
+6. _FILE_SIZE: BIGINT, file size.
+7. _ROW_COUNT: BIGINT, total number of rows.
+8. _DELETIONS_VECTORS_RANGES: Metadata only used by "DELETION_VECTORS", is an array of deletion vector meta, the schema of each deletion vector meta is:
+   1. f0: the data file name corresponding to this deletion vector.
+   2. f1: the starting offset of this deletion vector in the index file.
+   3. f2: the length of this deletion vector in the index file.
+   4. _CARDINALITY: the number of deleted rows.
 
-The index file meta is:
+## Appendix
 
-1. indexType: string, "HASH" or "DELETION_VECTORS".
-2. fileName: file name.
-3. fileSize: file size.
-4. rowCount: total number of rows.
-5. deletionVectorsRanges: Metadata only used by "DELETION_VECTORS", Stores offset and length of each data file,
-   The schema is `ARRAY<ROW<f0: STRING, f1: INT, f2: INT>>`.
+### SimpleStats
+
+SimpleStats is nested row, the schema is:
+
+1. _MIN_VALUES: BYTES, BinaryRow, the minimum values of the columns.
+2. _MAX_VALUES: BYTES, BinaryRow, the maximum values of the columns.
+3. _NULL_COUNTS: ARRAY<BIGINT>, the number of nulls of the columns.
+
+### BinaryRow
+
+BinaryRow is backed by bytes instead of Object. It can significantly reduce the serialization/deserialization of Java
+objects.
+
+A Row has two part: Fixed-length part and variable-length part. Fixed-length part contains 1 byte header and null bit
+set and field values. Null bit set is used for null tracking and is aligned to 8-byte word boundaries. `Field values`
+holds fixed-length primitive types and variable-length values which can be stored in 8 bytes inside. If it do not fit
+the variable-length field, then store the length and offset of variable-length part.