From 6a5b8401955dc963cfca85126e6e6b417a4f4510 Mon Sep 17 00:00:00 2001 From: Josip Igrec Date: Thu, 23 Jan 2025 12:37:54 +0100 Subject: [PATCH 1/2] chore: fix minor style/grammar issues in markdown files --- .markdownlint.yaml | 25 +++ CODE_OF_CONDUCT.md | 3 +- CONTRIBUTING.md | 63 +++---- README-libsql.md | 2 +- README.md | 14 +- bindings/wasm/README.md | 2 +- bottomless/README.md | 43 +++-- docs/ADMIN_API.md | 10 +- docs/BUILD-RUN.md | 9 +- docs/CONSISTENCY_MODEL.md | 6 +- docs/DESIGN.md | 6 +- docs/DOCKER.md | 18 +- docs/HRANA_3_SPEC.md | 31 ++-- docs/HTTP_V1_SPEC.md | 6 +- docs/HTTP_V2_SPEC.md | 2 +- docs/USER_GUIDE.md | 58 ++++--- docs/client_version_metrics.md | 3 +- docs/http_api.md | 62 +++---- libsql-server/README.md | 8 +- libsql-server/perf/pgbench/README.md | 2 +- libsql-shell/README.md | 15 +- libsql-sqlite3/doc/compile-for-windows.md | 121 +++++++------- libsql-sqlite3/doc/jsonb.md | 195 +++++++++++----------- libsql-sqlite3/doc/libsql_extensions.md | 74 +++++--- libsql-sqlite3/doc/testrunner.md | 79 +++++---- libsql-sqlite3/doc/trusted-schema.md | 84 +++++----- libsql-sqlite3/doc/vdbesort-memory.md | 6 +- libsql-sqlite3/doc/wal-lock.md | 34 ++-- libsql-sqlite3/ext/jni/README.md | 8 +- libsql-sqlite3/test/json/README.md | 91 +++++----- tools/fuzz/README.md | 5 +- 31 files changed, 570 insertions(+), 515 deletions(-) create mode 100644 .markdownlint.yaml diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 0000000000..c685afac26 --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,25 @@ +# Default state for all rules +default: true + +# MD010/no-hard-tabs : Hard tabs : https://github.com/DavidAnson/markdownlint/blob/v0.37.3/doc/md010.md +MD010: + # Include code blocks + code_blocks: false + # Fenced code languages to ignore + ignore_code_languages: [] + # Number of spaces for each hard tab + spaces_per_tab: 4 + +# MD013/line-length : Line length : https://github.com/DavidAnson/markdownlint/blob/v0.37.3/doc/md013.md +MD013: false + +# MD024/no-duplicate-heading : Multiple headings with the same content : https://github.com/DavidAnson/markdownlint/blob/v0.37.3/doc/md024.md +MD024: + # Only check sibling headings + siblings_only: true + +# MD033/no-inline-html : Inline HTML : https://github.com/DavidAnson/markdownlint/blob/v0.37.3/doc/md033.md +MD033: + # Allowed elements + allowed_elements: + - p diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 5f28f5c3bb..ad038bf0b5 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -61,7 +61,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -community@libsql.org. +[community@libsql.org](mailto:community@libsql.org). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the @@ -131,4 +131,3 @@ For answers to common questions about this code of conduct, see the FAQ at [Mozilla CoC]: https://github.com/mozilla/diversity [FAQ]: https://www.contributor-covenant.org/faq [translations]: https://www.contributor-covenant.org/translations - diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b22a5d91ae..8ed53804b6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,48 +4,51 @@ Thank you for your interest in contributing to libsql! We welcome contributions Before you start, please take a moment to review the guidelines outlined below. -# Code of Conduct +## Code of Conduct Please note that we have a Code of Conduct in place to ensure a positive and inclusive environment for everyone involved. -Getting Started - -Fork the repository. -Clone your forked repository to your local machine. -Create a new branch for your feature or bug fix: git checkout -b feature-name. -Make your changes and test thoroughly. -Commit your changes with descriptive commit messages. -Push your changes to your forked repository: git push origin feature-name. -Submit a pull request to the main branch of the original repository. -Contribution Guidelines - -All contributions should be well-documented and follow the existing code style. -Include tests for any new functionality or bug fixes. -Update documentation as necessary, including any relevant README or usage guides. -Maintain backward compatibility whenever possible. -Follow the Git commit message guidelines for consistent commit messages. -Review the existing issues and pull requests to avoid duplicating efforts. -Pull Request Process - -Ensure that your pull request addresses a specific issue. If there isn't an issue, consider creating one first. -Clearly describe the problem or feature in your pull request and reference the relevant issue. -Provide steps to reproduce and test your changes if applicable. -Ensure that your branch is up-to-date with the latest changes from the main branch. -All checks (tests, formatting, etc.) must pass before your pull request can be merged. -After addressing review comments, your pull request will be merged by a project maintainer. -Documentation +### Getting Started + +* Fork the repository. +* Clone your forked repository to your local machine. +* Create a new branch for your feature or bug fix: `git checkout -b feature-name`. +* Make your changes and test thoroughly. +* Commit your changes with descriptive commit messages. +* Push your changes to your forked repository: `git push origin feature-name`. +* Submit a pull request to the main branch of the original repository. + +### Contribution Guidelines + +* All contributions should be well-documented and follow the existing code style. +* Include tests for any new functionality or bug fixes. +* Update documentation as necessary, including any relevant README or usage guides. +* Maintain backward compatibility whenever possible. +* Follow the Git commit message guidelines for consistent commit messages. +* Review the existing issues and pull requests to avoid duplicating efforts. + +### Pull Request Process + +* Ensure that your pull request addresses a specific issue. + * If there isn't an issue, consider creating one first. +* Clearly describe the problem or feature in your pull request and reference the relevant issue. +* Provide steps to reproduce and test your changes if applicable. +* Ensure that your branch is up-to-date with the latest changes from the main branch. +* All checks (tests, formatting, etc.) must pass before your pull request can be merged. +* After addressing review comments, your pull request will be merged by a project maintainer. + +### Documentation Help us improve the project's documentation by fixing typos, clarifying language, or adding missing information. Documentation improvements can be submitted as pull requests. -Reporting Issues +### Reporting Issues If you encounter a bug or have a suggestion for the project, please open an issue using the provided issue template. Include as much detail as possible to help us understand and address the problem. -Getting Help +### Getting Help If you need assistance, have questions, or want to discuss ideas, you can: Join our Discord server and chat with the community. Mention @maintainer in your issue or pull request for a faster response from project maintainers. We appreciate your time and effort in contributing to libsql! - diff --git a/README-libsql.md b/README-libsql.md index dc5c5230f5..c96ca6ece7 100644 --- a/README-libsql.md +++ b/README-libsql.md @@ -13,7 +13,7 @@ This libSQL API is an experimental, batteries-included library built on top of S ## Getting Started -* [Rust](core) +* [Rust](core) * [Python](https://github.com/libsql/libsql-experimental-python) * [JavaScript](https://github.com/libsql/libsql-experimental-node) * [Go](https://github.com/libsql/go-libsql) diff --git a/README.md b/README.md index 0b9524b727..cf872e8852 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ + +

libSQL by Turso @@ -33,7 +35,7 @@ ## Documentation -We aim to evolve it to suit many more use cases than SQLite was originally designed for, and plan to use third-party OSS code wherever it makes sense. +We aim to evolve it to suit many more use cases than SQLite was originally designed for, and plan to use third-party OSS code wherever it makes sense. libSQL has many great features, including: @@ -54,19 +56,22 @@ The comprehensive description can be found [here](libsql-sqlite3/doc/libsql_exte ### Official Drivers * [TypeScript / JS](https://github.com/tursodatabase/libsql-client-ts) -* [Rust](libsql) +* [Rust](libsql) * [Go](https://github.com/tursodatabase/go-libsql) * [Go (no CGO)](https://github.com/tursodatabase/libsql-client-go) ### Experimental Drivers + * [Python](https://github.com/tursodatabase/libsql-experimental-python) (experimental) * [C](bindings/c) (experimental) ### Community Drivers + * [PHP](https://github.com/tursodatabase/turso-client-php) * [D](https://github.com/pdenapo/libsql-d) (experimental, based on the C driver) ### GUI Support + * [Beekeeper Studio](https://www.beekeeperstudio.io/db/libsql-client/) — macOS, Windows, and Linux * [Outerbase](https://www.outerbase.com) — Runs in the browser * [TablePlus](https://tableplus.com) — macOS, Windows, and Linux @@ -95,6 +100,7 @@ libsql> ``` ### Docker + To run libSQL using docker, refer to the [Docker Docs](docs/DOCKER.md) ## Why a fork? @@ -105,7 +111,7 @@ But despite having its code available, SQLite famously doesn't accept external c There have been other forks in the past, but they all focus on a specific technical difference. We aim to be a community where people can contribute from many different angles and motivations. -We want to see a world where everyone can benefit from all of the great ideas and hard work that the SQLite community contributes back to the codebase. Community contributions work well, because we’ve done it before. If this was possible, what do you think SQLite could become? +We want to see a world where everyone can benefit from all the great ideas and hard work that the SQLite community contributes back to the codebase. Community contributions work well, because we’ve done it before. If this was possible, what do you think SQLite could become? You can read more about our goals and motivation in our [product vision](https://turso.tech/libsql-manifesto). @@ -113,7 +119,7 @@ You can read more about our goals and motivation in our [product vision](https:/ Compatibility with SQLite is of great importance for us. But it can mean many things. So here's our stance: -* **The file format**: libSQL will always be able to ingest and write the SQLite file format. We would love to add extensions like encryption, and CRC that require the file to be changed. But we commit to always doing so in a way that generates standard sqlite files if those features are not used. +* **The file format**: libSQL will always be able to ingest and write the SQLite file format. We would love to add extensions like encryption, and CRC that require the file to be changed. But we commit to always doing so in a way that generates standard SQLite files if those features are not used. * **The API**: libSQL will keep 100% compatibility with the SQLite API, but we may add additional APIs. * **Embedded**: SQLite is an embedded database that can be consumed as a single .c file with its accompanying header. libSQL will always be embeddable, meaning it runs inside your process without needing a network connection. But we may change the distribution, so that object files are generated, instead of a single .c file. diff --git a/bindings/wasm/README.md b/bindings/wasm/README.md index c87e8e9be0..5a0895b24c 100644 --- a/bindings/wasm/README.md +++ b/bindings/wasm/README.md @@ -2,7 +2,7 @@ ## Developing -``` +```console wasm-pack build --target nodejs node example.js ``` diff --git a/bottomless/README.md b/bottomless/README.md index 5ac2764b94..a0e6b2920d 100644 --- a/bottomless/README.md +++ b/bottomless/README.md @@ -1,40 +1,51 @@ # Bottomless S3-compatible virtual WAL for libSQL -##### Work in heavy progress! + +## Work in heavy progress This project implements a virtual write-ahead log (WAL) which continuously backs up the data to S3-compatible storage and is able to restore it later. ## How to build -``` + +```shell LIBSQL_DIR=/path/to/your/libsql/directory make ``` + will produce a loadable `.so` libSQL extension with bottomless WAL implementation. -``` + +```shell LIBSQL_DIR=/path/to/your/libsql/directory make release ``` + will do the same, but for release mode. ## Configuration -By default, the S3 storage is expected to be available at `http://localhost:9000` (e.g. a local development [minio](https://min.io) server), and the auth information is extracted via regular S3 SDK mechanisms, i.e. environment variables and `~/.aws/credentials` file, if present. Ref: https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/guide_credentials_environment.html + +By default, the S3 storage is expected to be available at `http://localhost:9000` (e.g. a local development [minio](https://min.io) server), and the auth information is extracted via regular S3 SDK mechanisms, i.e. environment variables and `~/.aws/credentials` file, if present. Ref: Default endpoint can be overridden by an environment variable too, and in the future it will be available directly from libSQL as an URI parameter: -``` + +```shell export LIBSQL_BOTTOMLESS_ENDPOINT='http://localhost:9042' ``` Bucket used for replication can be configured with: -``` + +```shell export LIBSQL_BOTTOMLESS_BUCKET='custom-bucket' ``` On top of that, bottomless is implemented on top of the official [Rust SDK for S3](https://crates.io/crates/aws-sdk-s3), so all AWS-specific environment variables like `AWS_DEFAULT_REGION`, `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` also work, as well as the `~/.aws/credentials` file. ## How to use + From libSQL shell, load the extension and open a database file with `bottomless` WAL, e.g.: + ```sql .load ../target/debug/bottomless .open file:test.db?wal=bottomless PRAGMA journal_mode=wal; ``` + Remember to set the journaling mode to `WAL`, which needs to be done at least once, before writing any content, otherwise the custom WAL implementation will not be used. In order to customize logging, use `RUST_LOG` env variable, e.g. `RUST_LOG=info ./libsql`. @@ -46,14 +57,18 @@ LIBSQL_DIR=/path/to/your/libsql/directory make test ``` ## CLI + The command-line interface supports browsing, restoring and removing snapshot generations. It can be installed as a standalone executable with: + ```sh RUSTFLAGS="--cfg uuid_unstable" cargo install bottomless-cli ``` + Alternatively, bottomless-cli is available from the repository by running `cargo run`. Available commands: -``` + +```console $ bottomless-cli --help Bottomless CLI @@ -75,8 +90,9 @@ Options: ### Examples #### Listing generations -``` -[sarna@sarna-pc test]$ bottomless-cli -e http://localhost:9000 ls -v -l3 + +```console +$ bottomless-cli -e http://localhost:9000 ls -v -l3 e4eb3c21-ff53-7b2e-a6ea-ca396f4df9b1 created at (UTC): 2022-12-23 08:24:52.500 change counter: [0, 0, 0, 51] @@ -106,7 +122,8 @@ e4eb3c22-0941-73eb-85df-4e8552a0e88c ``` #### Restoring the database -``` + +```console $ RUST_LOG=info bottomless-cli -e http://localhost:9000 restore 2022-12-23T10:16:10.703557Z INFO bottomless::replicator: Bucket bottomless exists and is accessible 2022-12-23T10:16:10.709526Z INFO bottomless_cli: Database: test.db @@ -115,20 +132,24 @@ $ RUST_LOG=info bottomless-cli -e http://localhost:9000 restore ``` #### Removing old snapshots -``` + +```console $ bottomless-cli -e http://localhost:9000 rm -v --older-than 2022-12-15 Removed 4 generations ``` ## Details + All page writes committed to the database end up being asynchronously replicated to S3-compatible storage. On boot, if the main database file is empty, it will be restored with data coming from the remote storage. If the database file is newer, it will be uploaded to the remote location with a new generation number. If a local WAL file is present and detected to be newer than remote data, it will be uploaded as well. ### Tests + A fully local test can be performed by using a local S3-compatible server, e.g. [Minio](https://min.io/). Assuming the server is available at HTTP port 9000, you can use the following scripts: + ```sh cd test/ export LIBSQL_BOTTOMLESS_ENDPOINT=http://localhost:9000 diff --git a/docs/ADMIN_API.md b/docs/ADMIN_API.md index e49ac6a762..118274a5ec 100644 --- a/docs/ADMIN_API.md +++ b/docs/ADMIN_API.md @@ -5,29 +5,33 @@ This document describes the admin API endpoints. The admin API is used to manage namespaces on a `sqld` instance. Namespaces are isolated database within a same sqld instance. To enable the admin API, and manage namespaces, two extra flags need to be passed to `sqld`: + - `--admin-listen-addr :`: the address and port on which the admin API should listen. It must be different from the user API listen address (whi defaults to port 8080). - `--enable-namespaces`: enable namespaces for the instance. By default namespaces are disabled. ## Routes -``` +```HTTP POST /v1/namespaces/:namespace/create ``` + Create a namespace named `:namespace`. body: + ```json { "dump_url"?: string, } ``` -``` +```HTTP DELETE /v1/namespaces/:namespace ``` Delete the namespace named `:namespace`. -``` +```HTTP POST /v1/namespaces/:namespace/fork/:to ``` + Fork `:namespace` into new namespace `:to` diff --git a/docs/BUILD-RUN.md b/docs/BUILD-RUN.md index 25b1a43cdf..947eb0aed9 100644 --- a/docs/BUILD-RUN.md +++ b/docs/BUILD-RUN.md @@ -22,8 +22,9 @@ You can query sqld using one of the provided [client libraries](../libsql-server#client-libraries). You can also use the [turso cli](https://docs.turso.tech/reference/turso-cli) to connect to the sqld instance: -``` -turso db shell http://127.0.0.1:8080 + +```console +turso db shell http://127.0.0.1:8080 ``` ## Download a prebuilt binary @@ -59,7 +60,7 @@ sqld --help ## Using a prebuilt Docker image The sqld release process publishes a Docker image to the GitHub Container -Registry. The URL is https://ghcr.io/tursodatabase/libsql-server. You can run the latest image locally +Registry. The URL is . You can run the latest image locally on port 8080 with the following: ```bash @@ -194,7 +195,5 @@ development. cargo xtask test ``` - -[sqld releases page]: https://github.com/libsql/sqld/releases [sqld container release tags]: https://github.com/libsql/sqld/pkgs/container/sqld [sqld release tags]: https://github.com/libsql/sqld/releases diff --git a/docs/CONSISTENCY_MODEL.md b/docs/CONSISTENCY_MODEL.md index 711c731e43..9bedf0804e 100644 --- a/docs/CONSISTENCY_MODEL.md +++ b/docs/CONSISTENCY_MODEL.md @@ -2,16 +2,16 @@ ## Building on top of sqlite -sqlite offers a strictly serializable consistency model. Since sqld is built on top of it, it inherits some of its properties. +SQLite offers a strictly serializable consistency model. Since sqld is built on top of it, it inherits some of its properties. ## Transactional consistency -Any transaction in sqld is equivalent to sqlite transaction. When a transaction is opened, on the primary or replicas alike, the view that the transaction get is "frozen" is time. any write performed by a transaction is at the same time immediately visible to itself, as well as completely isolated from any other ongoing transactions. Therefore, sqld offers serializable transactions +Any transaction in sqld is equivalent to SQLite transaction. When a transaction is opened, on the primary or replicas alike, the view that the transaction get is "frozen" is time. Any write performed by a transaction is at the same time immediately visible to itself, as well as completely isolated from any other ongoing transactions. Therefore, sqld offers serializable transactions ## Real-time guarantees All operations occurring on the primary are linearizable. However, there is no guarantee that changes made to the primary are immediately visible to all replicas. Sqld guarantees that a process (connection) will always see its write. Given that the primary is linearizable, it means that a process is guaranteed to see all writes that happened on the primary up until (at least) the last write performed by the process. This is not true for two distinct processes on the same replica, however, that can potentially read two different points in time. For example, a read for process A on the replica might return immediately returning some state, while a read on process B issued at the same time would need to wait to sync with the primary. -Note that reads on a replica are monotonical: once a value has been witnessed, only a value at least as recent can be witnessed on any subsequent read. +Note that reads on a replica are monotonic: once a value has been witnessed, only a value at least as recent can be witnessed on any subsequent read. There are no global ordering guarantees provided by sqld: any two instances needn't be in sync at any time. diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 20be528970..f9e5fa9d1d 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -1,4 +1,4 @@ -## Overview +# Overview `sqld` is a server mode for [libSQL](https://libsql.org), which provides SQLite interface and dialect for use cases such as edge functions where it's impractical to embed a full database engine. @@ -11,7 +11,7 @@ The `sqld` consists of a: * Replica servers (optional) * mvSQLite backend (optional) -The client provides a SQLite ABI compatible inteface as a drop-in replacement for applications using libSQL or SQLite. The client library transforms SQLite C API calls into PostgreSQL wire protocol messages and sends them to the primary server. +The client provides a SQLite ABI compatible interface as a drop-in replacement for applications using libSQL or SQLite. The client library transforms SQLite C API calls into PostgreSQL wire protocol messages and sends them to the primary server. The primary server is a `sqld` process, which servers SQLite dialect over the PostgreSQL wire protocol. The server can either be backed by single-node `libSQL` database or by a mvSQLite backend, which provides improved write concurrency, high availability, and fault tolerance using FoundationDB. @@ -39,7 +39,7 @@ sequenceDiagram ### Writes -Clients initialte writes with, for example, the `sqlite3_exec()` API by performing a `INSERT`, `UPDATE`, or `DELETE` SQL statement. +Clients initiate writes with, for example, the `sqlite3_exec()` API by performing a `INSERT`, `UPDATE`, or `DELETE` SQL statement. The primary server is responsible for writes. The client sends writes to the primary server or a replica. If a replica receives a write, it delegates the write to the primary server. The primary server either performs the write against its local `libSQL` database or processes it via `mvSQLite`, which uses FoundationDB. diff --git a/docs/DOCKER.md b/docs/DOCKER.md index 2eb9e0f782..aeb944c686 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -1,8 +1,10 @@ + + # Docker image quick reference ## Launch a primary instance -``` +```console docker run --name some-sqld -p 8080:8080 -ti \ -e SQLD_NODE=primary \ ghcr.io/tursodatabase/libsql-server:latest @@ -10,7 +12,7 @@ docker run --name some-sqld -p 8080:8080 -ti \ ## Launch a replica instance -``` +```console docker run --name some-sqld-replica -p 8081:8080 -ti \ -e SQLD_NODE=replica \ -e SQLD_PRIMARY_URL=https://: \ @@ -19,7 +21,7 @@ docker run --name some-sqld-replica -p 8081:8080 -ti \ ## Running on Apple Silicon -``` +```console docker run --name some-sqld -p 8080:8080 -ti \ -e SQLD_NODE=primary \ --platform linux/amd64 \ @@ -36,13 +38,13 @@ and stable releases please use the x86_64 versions via Rosetta._ # How to extend this image -## Data Persistance +## Data Persistence -Database files are stored in the `/var/lib/sqld` in the image. To persist the -database across runs, mount this location to either a docker volume or a bind +Database files are stored in the `/var/lib/sqld` in the image. To persist the +database across runs, mount this location to either a docker volume or a bind mount on your local disk. -``` +```console docker run --name some-sqld -ti \ -v $(pwd)/sqld-data:/var/lib/sqld \ # you can mount local path -e SQLD_NODE=primary \ @@ -119,7 +121,7 @@ inter-node communication. Recommended to leave this on default. Simple docker compose for local development: -``` +```yaml version: "3" services: db: diff --git a/docs/HRANA_3_SPEC.md b/docs/HRANA_3_SPEC.md index c8bce89150..6887518e2a 100644 --- a/docs/HRANA_3_SPEC.md +++ b/docs/HRANA_3_SPEC.md @@ -48,8 +48,6 @@ Both encodings support forward compatibility: when a peer (client or server) receives a protocol structure that includes an unrecognized field (object property in JSON or a message field in Protobuf), it must ignore this field. - - ## Hrana over WebSocket Hrana over WebSocket runs on top of the [WebSocket protocol][rfc6455]. @@ -68,7 +66,7 @@ the Hrana protocol and forward compatibility with newer versions. The WebSocket subprotocols defined in all Hrana versions are as follows: -| Subprotocol | Version | Encoding | +| Subprotocol | Version | Encoding | |-------------|---------|----------| | `hrana1` | 1 | JSON | | `hrana2` | 2 | JSON | @@ -581,8 +579,6 @@ For example, this means that a client can send an `open_stream` request immediately followed by a batch of `execute` requests on that stream and the server will always process them in correct order. - - ## Hrana over HTTP Hrana over HTTP runs on top of HTTP. Any version of the HTTP protocol can be @@ -624,7 +620,7 @@ specified method and URL. #### Check support for version 3 (JSON) -``` +```HTTP GET v3 ``` @@ -633,7 +629,7 @@ should return a 2xx response to this request. #### Check support for version 3 (Protobuf) -``` +```text GET v3-protobuf ``` @@ -642,7 +638,7 @@ should return a 2xx response to this request. #### Execute a pipeline of requests (JSON) -``` +```HTTP POST v3/pipeline -> JSON: PipelineReqBody <- JSON: PipelineRespBody @@ -703,7 +699,7 @@ executes all requests, even if some of them return errors. #### Execute a pipeline of requests (Protobuf) -``` +```text POST v3-protobuf/pipeline -> Protobuf: PipelineReqBody <- Protobuf: PipelineRespBody @@ -714,7 +710,7 @@ the request and response body using Protobuf. #### Execute a batch using a cursor (JSON) -``` +```HTTP POST v3/cursor -> JSON: CursorReqBody <- line of JSON: CursorRespBody @@ -745,7 +741,7 @@ response have the same meaning as in the `v3/pipeline` endpoint. #### Execute a batch using a cursor (Protobuf) -``` +```text POST v3-protobuf/cursor -> Protobuf: CursorReqBody <- length-delimited Protobuf: CursorRespBody @@ -756,7 +752,7 @@ The `v3-protobuf/cursor` endpoint is the same as `v3/cursor` endpoint, but the request and response are encoded using Protobuf. In the response body, the structures are prefixed with a length delimiter: a -Protobuf varint that encodes the length of the structure. The first structure is +Protobuf variant that encodes the length of the structure. The first structure is `CursorRespBody`, followed by an arbitrary number of `CursorEntry` structures. ### Requests @@ -818,7 +814,7 @@ type ExecuteStreamResp = { ``` The `execute` request has the same semantics as the `execute` request in Hrana -over WebSocket. +over WebSocket. > This request was introduced in Hrana 2. @@ -946,8 +942,6 @@ the encoding indicated by the `Content-Type` response header), but the client must be able to handle responses with different bodies, such as plaintext or HTML, which might be returned by various components in the HTTP stack. - - ## Shared structures This section describes protocol structures that are common for both Hrana over @@ -1051,7 +1045,7 @@ table. The rowid value is a 64-bit signed integer encoded as a string in JSON. For other statements, the value is undefined. > This structure was introduced in Hrana 1. The `decltype` field in the `Col` -> strucure was added in Hrana 2. +> structure was added in Hrana 2. ### Batches @@ -1161,7 +1155,7 @@ At the beginning of every batch step that is executed, the server produces a `step_begin` entry. This entry specifies the index of the step (which refers to the `steps` array in the `Batch` structure). The server sends entries for steps in the order in which they are executed. If a step is skipped (because its -condition evalated to false), the server does not send any entry for it. +condition evaluated to false), the server does not send any entry for it. After a `step_begin` entry, the server sends an arbitrary number of `row` entries that encode the individual rows produced by the statement, terminated by @@ -1284,9 +1278,6 @@ depends on the `type` field: > This structure was introduced in Hrana 1. - - - ## Protobuf schema ### Hrana over WebSocket diff --git a/docs/HTTP_V1_SPEC.md b/docs/HTTP_V1_SPEC.md index 8fc6939935..8c8ab8e373 100644 --- a/docs/HTTP_V1_SPEC.md +++ b/docs/HTTP_V1_SPEC.md @@ -2,7 +2,7 @@ Version 1 of the HTTP API ("Hrana over HTTP") is designed to complement the WebSocket-based Hrana protocol for use cases that don't require stateful -database connections and for which the additional network rountrip required by +database connections and for which the additional network roundtrip required by WebSockets relative to HTTP is not necessary. This API aims to be of production quality and it is primarily intended to be @@ -25,7 +25,7 @@ All request and response bodies are encoded in JSON, with content type ## Execute a statement -``` +```HTTP POST /v1/execute -> { @@ -44,7 +44,7 @@ Hrana. ## Execute a batch -``` +```HTTP POST /v1/batch -> { diff --git a/docs/HTTP_V2_SPEC.md b/docs/HTTP_V2_SPEC.md index a210b93572..4bd030526b 100644 --- a/docs/HTTP_V2_SPEC.md +++ b/docs/HTTP_V2_SPEC.md @@ -148,7 +148,7 @@ type ExecuteStreamResp = { } ``` -The `execute` request has the same semantics as the `execute` request in Hrana. +The `execute` request has the same semantics as the `execute` request in Hrana. ### Execute a batch diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index d39790a183..615ecab5ca 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -4,15 +4,21 @@ Welcome to the `sqld` user guide! ## Table of Contents -* [Overview](#overview) -* [Replication](#replication) - * [TLS configuration](#tls-configuration) - * [Launching a primary server](#launching-a-primary-server) - * [Launching a replica server](#launching-a-replica-server) -* [Client Authentication](#clientauthentication) -* [Deployment](#deployment) - * [Deploying with Docker](#deploying-with-docker) - * [Deploying on Fly](#deploying-on-fly) +- [`sqld` User Guide](#sqld-user-guide) + - [Table of Contents](#table-of-contents) + - [Overview](#overview) + - [Replication](#replication) + - [TLS configuration](#tls-configuration) + - [Launching a primary server](#launching-a-primary-server) + - [Launching a replica server](#launching-a-replica-server) + - [Client Authentication](#client-authentication) + - [Deployment](#deployment) + - [Deploying with Docker](#deploying-with-docker) + - [Deploying on Fly](#deploying-on-fly) + - [Incremental snapshots](#incremental-snapshots) + - [Multitenancy](#multitenancy) + - [Path based routing](#path-based-routing) + - [Wildcard domain for development](#wildcard-domain-for-development) ## Overview @@ -37,13 +43,13 @@ In this section, we will walk you through how to set up a libsql cluster. The nodes in a `sqld` cluster communicate over gRPC with TLS. To set up a `sqld` cluster, you need the following TLS configuration: -* Certificate authority (CA) certificate and private key -* Primary server certificate and private key -* Replica server certificates and private keys +- Certificate authority (CA) certificate and private key +- Primary server certificate and private key +- Replica server certificates and private keys In TLS speak, the primary server is the server and the replica servers are the clients. -For *development and testing* purposes, you can generate TLS keys and certificates with: +For _development and testing_ purposes, you can generate TLS keys and certificates with: ```console python scripts/gen_certs.py @@ -51,12 +57,12 @@ python scripts/gen_certs.py The script generates the following files: -* `ca_cert.pem` -- certificate authority certificate -* `ca_key.pem` -- certificate authority private key -* `server_cert.pem` -- primary server certificate -* `server_key.pem` -- primary server private key -* `client_cert.pem` -- replica server certificate -* `client_key.pem ` -- replica server private key +- `ca_cert.pem` -- certificate authority certificate +- `ca_key.pem` -- certificate authority private key +- `server_cert.pem` -- primary server certificate +- `server_key.pem` -- primary server private key +- `client_cert.pem` -- replica server certificate +- `client_key.pem` -- replica server private key ### Launching a primary server @@ -126,20 +132,24 @@ You can find more information about the Docker image [here](./DOCKER.md). You can use the existing `fly.toml` file from this repository. Just run + ```console flyctl launch ``` + ... then pick a name and respond "Yes" when the prompt asks you to deploy. You now have `sqld` running on Fly listening for HTTP connections. Give it a try with this snippet, replacing `$YOUR_APP` with your app name: -``` + +```console curl -X POST -d '{"statements": ["create table testme(a,b,c)"]}' $YOUR_APP.fly.dev curl -X POST -d '{"statements": ["insert into testme values(1,2,3)"]}' $YOUR_APP.fly.dev curl -X POST -d '{"statements": ["select * from testme"]}' $YOUR_APP.fly.dev ``` -``` + +```json [{"b":2,"a":1,"c":3}] ``` @@ -161,7 +171,7 @@ NAMESPACE="$2" echo "Generated incremental snapshot $SNAPSHOT_FILE for namespace $NAMESPACE" -# At this point we can ship the snapshot file to whereever we would like but we +# At this point we can ship the snapshot file to wherever we would like but we # must delete it from its location on disk or else sqld will panic. rm $SNAPSHOT_FILE ``` @@ -221,10 +231,10 @@ async fn main() { ``` When applying snapshots the format of the file name gives certain information. -The format is `{namespace}:{log_id}:{start_frame_no:020x}-{end_frame_no:020x}.snap` where log_id represents the unqiue write ahead log and then +The format is `{namespace}:{log_id}:{start_frame_no:020x}-{end_frame_no:020x}.snap` where log_id represents the unique write ahead log and then for each unique log_id there will be snapshots starting at frame `0` up until the end. Snapshots must be applied sequentially for each log_id starting at -frame 0. +frame 0. ## Multitenancy diff --git a/docs/client_version_metrics.md b/docs/client_version_metrics.md index f22e752830..2cdb13f578 100644 --- a/docs/client_version_metrics.md +++ b/docs/client_version_metrics.md @@ -1,4 +1,4 @@ -# Client verison metrics +# Client version metrics Currently, `sqld` supports clients passing their client version via a `x-libsql-client-version` header. The value of this header should follow this @@ -7,7 +7,6 @@ pattern: - Hrana/Remote clients should be `libsql-remote--` - Embedded replica clients should be `libsql-rpc--` - `` should be a reference to the language, for example, `rust`/`go`/`js`/`python`. diff --git a/docs/http_api.md b/docs/http_api.md index 7822c23b2e..f42adc06ae 100644 --- a/docs/http_api.md +++ b/docs/http_api.md @@ -14,13 +14,13 @@ The `Value` type represents an SQLite value. It has 4 variants: - Blob: some binary data, encoded in base64 - Null: the null value. -All these types map to JSON straightforwardly, except for blobs, that are represented as an object with { "base64": /* base64 encoded blob */} +All these types map to JSON straightforwardly, except for blobs, that are represented as an object with `{ "base64": /* base64 encoded blob */ }` ### Response format Responses to queries can either succeed or fail. When they succeed a payload specific to the endpoint being called is returned with a HTTP 200 (OK) status code. -In the case of a failure, a specific `Error` response is returned with the approriate HTTP status code. The `Error` response has the following structure: +In the case of a failure, a specific `Error` response is returned with the appropriate HTTP status code. The `Error` response has the following structure: ```ts type Error = { @@ -40,7 +40,7 @@ Where `T` is the type of the payload in case of success. #### Queries -``` +```HTTP POST / ``` @@ -66,6 +66,7 @@ Queries are either simple strings or `ParamQuery` that accept parameter bindings ##### Response Format On success, a request to `POST /` returns a response with an HTTP 200 code and a JSON body with the following structure: + ```ts type BatchResponse = Array|Error @@ -91,39 +92,38 @@ The `Query` can either be a plain query string, such as `SELECT * FROM users` or Queries with bound parameters come in two types: 1. Named bound parameters, where the parameter is referred to by a name and is prefixed with a `:`, a `@` or a `$`. If the query uses named parameters, then the `params` field of the query should be an object mapping parameters to their value. - -- Example: a query with named bound parameters - -```json -{ - "q": "SELECT * FROM users WHERE name = :name AND age = &age AND height > @height AND address = $address", - "params": { - ":name": "adhoc", - "age" : "18", - "@height" : "170", - "$address" : "very nice place", - } -} -``` -The prefix of the parameter does not have to be specified in the `params` field (i.e, `name` instead of `:name`). If a -param `name` is given in `params` it will be binded to `:name`, `$name` and `@name` unless `params` contain a better -match. `:name` is a better match for `:name` than `name`. -One named parameter can occur in a query multiple times but does not have to be repeated in `params`. + - Example: a query with named bound parameters + + ```json + { + "q": "SELECT * FROM users WHERE name = :name AND age = &age AND height > @height AND address = $address", + "params": { + ":name": "adhoc", + "age" : "18", + "@height" : "170", + "$address" : "very nice place", + } + } + ``` + + The prefix of the parameter does not have to be specified in the `params` field (i.e, `name` instead of `:name`). If a + param `name` is given in `params` it will be bound to `:name`, `$name` and `@name` unless `params` contain a better + match. `:name` is a better match for `:name` than `name`. + One named parameter can occur in a query multiple times but does not have to be repeated in `params`. 2. Positional query parameters, bound by their position in the parameter list, and prefixed `?`. If the query uses positional parameters, the values should be provided as an array to the `params` field. + - Example: a query with positional bound parameters -- Example: a query with positional bound parameters - -```json -{ - "q": "SELECT * FROM users WHERE name = ?", - "params": ["adhoc"] -} -``` + ```json + { + "q": "SELECT * FROM users WHERE name = ?", + "params": ["adhoc"] + } + ``` #### Health -``` +```HTTP GET /health ``` @@ -131,7 +131,7 @@ The health route return an `HTTP 200 (OK)` if the server is up and running. #### Version -``` +```HTTP GET /version ``` diff --git a/libsql-server/README.md b/libsql-server/README.md index 9700e23bc1..44069f92b0 100644 --- a/libsql-server/README.md +++ b/libsql-server/README.md @@ -13,11 +13,9 @@ case. ## Features * SQLite dialect layered on top of HTTP. -* SQLite-compatible API that you can drop-in with `LD_PRELOAD` in your - application to switch from local database to a remote database. +* SQLite-compatible API that is a drop-in replacement with `LD_PRELOAD` in your application to switch from local database to a remote database. * Read replica support. -* Integration with [mvSQLite](https://github.com/losfair/mvsqlite) for high - availability and fault tolerance. +* Integration with [mvSQLite](https://github.com/losfair/mvsqlite) for high availability and fault tolerance. ## Build and run @@ -28,7 +26,7 @@ using Homebrew, Docker, or your own Rust toolchain. Run the command below to run all tests for `libsql` and `libsql-server`. -``` +```bash cargo xtask test ``` diff --git a/libsql-server/perf/pgbench/README.md b/libsql-server/perf/pgbench/README.md index 647fa0d812..9927f1ca0f 100644 --- a/libsql-server/perf/pgbench/README.md +++ b/libsql-server/perf/pgbench/README.md @@ -2,7 +2,7 @@ Setup database: -``` +```console psql -h 127.0.0.1 -p 5432 < pg_bench_schema.sql ```` diff --git a/libsql-shell/README.md b/libsql-shell/README.md index efb4cc2194..5348689dae 100644 --- a/libsql-shell/README.md +++ b/libsql-shell/README.md @@ -4,17 +4,20 @@ This project contains [libSQL](https://libsql.org)'s new shell, implemented in Rust on top of a few industry standard crates: `rusqlite`, `rustyline`, `clap`, `tracing`, etc. The long-term goal of this project is to: - - Match all features of the original libSQL shell (inherited from SQLite and implemented in C), - - Add new features on top, for instance: - - importing and exporting additional formats (Parquet and friends); - - accessing network resources. - - Make contributions to libSQL as easy as possible. + +- Match all features of the original libSQL shell (inherited from SQLite and implemented in C), +- Add new features on top, for instance: + - importing and exporting additional formats (Parquet and friends); + - accessing network resources. +- Make contributions to libSQL as easy as possible. ## Status + This project is still in early development phase, so expect missing items! ## Example -``` + +```console $ ./libsql libSQL version 0.2.0 Connected to a transient in-memory database. diff --git a/libsql-sqlite3/doc/compile-for-windows.md b/libsql-sqlite3/doc/compile-for-windows.md index b8a50afb32..a0df54710e 100644 --- a/libsql-sqlite3/doc/compile-for-windows.md +++ b/libsql-sqlite3/doc/compile-for-windows.md @@ -3,12 +3,12 @@ Here are step-by-step instructions on how to build SQLite from canonical source on a new Windows 11 PC, as of 2023-11-01: - 1. Install Microsoft Visual Studio. The free "community edition" + 1. Install Microsoft Visual Studio. The free "community edition" will work fine. Do a standard install for C++ development. SQLite only needs the "cl" compiler and the "nmake" build tool. - 2. Under the "Start" menu, find "All Apps" then go to "Visual Studio 20XX" + 2. Under the "Start" menu, find "All Apps" then go to "Visual Studio 20XX" and find "x64 Native Tools Command Prompt for VS 20XX". Pin that application to your task bar, as you will use it a lot. Bring up an instance of this command prompt and do all of the subsequent steps @@ -16,80 +16,69 @@ canonical source on a new Windows 11 PC, as of 2023-11-01: a 32-bit build.) The subsequent steps will not work in a vanilla DOS prompt. Nor will they work in PowerShell. - 3. Install TCL development libraries. This note assumes that you will + 3. Install TCL development libraries. This note assumes that you will install the TCL development libraries in the "`c:\Tcl`" directory. Make adjustments if you want TCL installed somewhere else. SQLite needs both the "tclsh.exe" command-line tool as part of the build process, and the "tcl86.lib" library in order to run tests. You will need TCL version 8.6 or later. -

    -
  1. Get the TCL source archive, perhaps from + + 1. Get the TCL source archive, perhaps from [https://www.tcl.tk/software/tcltk/download.html](https://www.tcl.tk/software/tcltk/download.html). -
  2. Untar or unzip the source archive. CD into the "win/" subfolder - of the source tree. -
  3. Run: `nmake /f makefile.vc release` -
  4. Run: `nmake /f makefile.vc INSTALLDIR=c:\Tcl install` -
  5. CD to `c:\Tcl\lib`. In that subfolder make a copy of the - "`tcl86t.lib`" file to the alternative name "`tcl86.lib`" - (omitting the second 't'). Leave the copy in the same directory - as the original. -
  6. CD to `c:\Tcl\bin`. Make a copy of the "`tclsh86t.exe`" - file into "`tclsh.exe`" (without the "86t") in the same directory. -
  7. Add `c:\Tcl\bin` to your %PATH%. To do this, go to Settings - and search for "path". Select "edit environment variables for - your account" and modify your default PATH accordingly. - You will need to close and reopen your command prompts after - making this change. -
- - 4. Download the SQLite source tree and unpack it. CD into the + 2. Untar or unzip the source archive. CD into the "win/" subfolder + of the source tree. + 3. Run: `nmake /f makefile.vc release` + 4. Run: `nmake /f makefile.vc INSTALLDIR=c:\Tcl install` + 5. CD to `c:\Tcl\lib`. In that subfolder make a copy of the + "`tcl86t.lib`" file to the alternative name "`tcl86.lib`" + (omitting the second 't'). + 6. CD to `c:\Tcl\bin`. Make a copy of the "`tclsh86t.exe`" + file into "`tclsh.exe`" (without the "86t") in the same directory. + 7. Add `c:\Tcl\bin` to your %PATH%. To do this, go to Settings + and search for "path". Select "edit environment variables for + your account" and modify your default PATH accordingly. + You will need to close and reopen your command prompts after + making this change. + + 4. Download the SQLite source tree and unpack it. CD into the toplevel directory of the source tree. - 5. Set the TCLDIR environment variable to point to your TCL installation. + 5. Set the TCLDIR environment variable to point to your TCL installation. Like this: - + - `set TCLDIR=c:\Tcl` - 6. Run the "`Makefile.msc`" makefile with an appropriate target. + 6. Run the "`Makefile.msc`" makefile with an appropriate target. Examples: - + - `nmake /f makefile.msc` + - `nmake /f makefile.msc sqlite3.c` + - `nmake /f makefile.msc devtest` + - `nmake /f makefile.msc releasetest` ## 32-bit Builds Doing a 32-bit build is just like doing a 64-bit build with the following minor changes: - 1. Use the "x86 Native Tools Command Prompt" instead of - "x64 Native Tools Command Prompt". "**x86**" instead of "**x64**". +1. Use the "x86 Native Tools Command Prompt" instead of "x64 Native Tools Command Prompt". "**x86**" instead of "**x64**". - 2. Use a different installation directory for TCL. - The recommended directory is `c:\tcl32`. Thus you end up - with two TCL builds: - +2. Use a different installation directory for TCL. + The recommended directory is `c:\tcl32`. + Thus you end up with two TCL builds: + - `c:\tcl` ← 64-bit (the default) + - `c:\tcl32` ← 32-bit - 3. Ensure that `c:\tcl32\bin` comes before `c:\tcl\bin` on - your PATH environment variable. You can achieve this using - a command like: - +3. Ensure that `c:\tcl32\bin` comes before `c:\tcl\bin` on + your PATH environment variable. You can achieve this using + a command like: + - `set PATH=c:\tcl32\bin;%PATH%` ## Building a DLL -The command the developers use for building the deliverable DLL on the +The command the developers use for building the deliverable DLL on the [download page](https://sqlite.org/download.html) is as follows: -> ~~~~ +~~~~cmd nmake /f Makefile.msc sqlite3.dll USE_NATIVE_LIBPATHS=1 "OPTS=-DSQLITE_ENABLE_FTS3=1 -DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_FTS5=1 -DSQLITE_ENABLE_RTREE=1 -DSQLITE_ENABLE_JSON1=1 -DSQLITE_ENABLE_GEOPOLY=1 -DSQLITE_ENABLE_SESSION=1 -DSQLITE_ENABLE_PREUPDATE_HOOK=1 -DSQLITE_ENABLE_SERIALIZE=1 -DSQLITE_ENABLE_MATH_FUNCTIONS=1" ~~~~ @@ -103,7 +92,7 @@ with TCL in order to function. The [sqlite3_analyzer.exe program](https://sqlit is an example. You can build as described above, and then enter: -> ~~~~ +~~~~cmd nmake /f Makefile.msc sqlite3_analyzer.exe ~~~~ @@ -112,16 +101,15 @@ will depend on having the "tcl86.dll" library somewhere on your %PATH%. Use the following steps to build an executable that has the TCL library statically linked so that it does not depend on separate DLL: - 1. Use the appropriate "Command Prompt" window - either x86 or + 1. Use the appropriate "Command Prompt" window - either x86 or x64, depending on whether you want a 32-bit or 64-bit executable. - 2. Untar the TCL source tarball into a fresh directory. CD into + 2. Untar the TCL source tarball into a fresh directory. CD into the "win/" subfolder. - 3. Run: `nmake /f makefile.vc OPTS=nothreads,static shell` - + 3. Run: `nmake /f makefile.vc OPTS=nothreads,static shell` - 4. CD into the "Release*" subfolder that is created (note the + 4. CD into the "Release*" subfolder that is created (note the wildcard - the full name of the directory might vary). There you will find the "tcl86s.lib" file. Copy this file into the same directory that you put the "tcl86.lib" on your initial @@ -129,20 +117,23 @@ statically linked so that it does not depend on separate DLL: "`C:\Tcl32\lib`" for 32-bit builds and "`C:\Tcl\lib`" for 64-bit builds.) - 5. CD into your SQLite source code directory and build the desired + 5. CD into your SQLite source code directory and build the desired utility program, but add the following extra arguments to the nmake command line: -
+
+      ~~~cmd
       CCOPTS="-DSTATIC_BUILD" LIBTCL="tcl86s.lib netapi32.lib user32.lib"
-      
+ ~~~ +

So, for example, to build a statically linked version of sqlite3_analyzer.exe, you might type: -

+      ~~~cmd
       nmake /f Makefile.msc CCOPTS="-DSTATIC_BUILD" LIBTCL="tcl86s.lib netapi32.lib user32.lib" sqlite3_analyzer.exe
-      
+ ~~~ - 6. After your executable is built, you can verify that it does not + 6. After your executable is built, you can verify that it does not depend on the TCL DLL by running: -
+
+      ~~~cmd
       dumpbin /dependents sqlite3_analyzer.exe
-      
+ ~~~ diff --git a/libsql-sqlite3/doc/jsonb.md b/libsql-sqlite3/doc/jsonb.md index 5beed1631d..3ff3d44c0a 100644 --- a/libsql-sqlite3/doc/jsonb.md +++ b/libsql-sqlite3/doc/jsonb.md @@ -6,12 +6,12 @@ JSON. ## 1.0 What Is JSONB? Beginning with version 3.45.0 (circa 2024-01-01), SQLite supports an -alternative binary encoding of JSON which we call "JSONB". JSONB is +alternative binary encoding of JSON which we call "JSONB". JSONB is a binary format that stored as a BLOB. The advantage of JSONB over ordinary text RFC 8259 JSON is that JSONB is both slightly smaller (by between 5% and 10% in most cases) and -can be processed in less than half the number of CPU cycles. The built-in +can be processed in less than half the number of CPU cycles. The built-in [JSON SQL functions] of SQLite can accept either ordinary text JSON or the binary JSONB encoding for any of their JSON inputs. @@ -23,46 +23,46 @@ representations and are not in any way binary compatible. The central idea behind this JSONB specification is that each element begins with a header that includes the size and type of that element. The header takes the place of punctuation such as double-quotes, -curly-brackes, square-brackets, commas, and colons. Since the size +curly-brackes, square-brackets, commas, and colons. Since the size and type of each element is contained in its header, the element can be read faster since it is no longer necessary to carefully scan forward -looking for the closing delimiter. The payload of JSONB is the same -as for corresponding text JSON. The same payload bytes occur in the -same order. The only real difference between JSONB and ordinary text +looking for the closing delimiter. The payload of JSONB is the same +as for corresponding text JSON. The same payload bytes occur in the +same order. The only real difference between JSONB and ordinary text JSON is that JSONB includes a binary header on each element and omits delimiter and separator punctuation. ### 1.1 Internal Use Only The details of the JSONB are not intended to be visible to application -developers. Application developers should look at JSONB as an opaque BLOB -used internally by SQLite. Nevertheless, we want the format to be backwards -compatible across all future versions of SQLite. To that end, the format -is documented by this file in the source tree. But this file should be +developers. Application developers should look at JSONB as an opaque BLOB +used internally by SQLite. Nevertheless, we want the format to be backwards +compatible across all future versions of SQLite. To that end, the format +is documented by this file in the source tree. But this file should be used only by SQLite core developers, not by developers of applications that only use SQLite. ## 2.0 The Purpose Of This Document JSONB is not intended as an external format to be used by -applications. JSONB is designed for internal use by SQLite only. +applications. JSONB is designed for internal use by SQLite only. Programmers do not need to understand the JSONB format in order to use it effectively. Applications should access JSONB only through the [JSON SQL functions], not by looking at individual bytes of the BLOB. However, JSONB is intended to be portable and backwards compatible -for all future versions of SQLite. In other words, you should not have +for all future versions of SQLite. In other words, you should not have to export and reimport your SQLite database files when you upgrade to -a newer SQLite version. For that reason, the JSONB format needs to +a newer SQLite version. For that reason, the JSONB format needs to be well-defined. This document is therefore similar in purpose to the [SQLite database file format] document that describes the on-disk -format of an SQLite database file. Applications are not expected +format of an SQLite database file. Applications are not expected to directly read and write the bits and bytes of SQLite database files. The SQLite database file format is carefully documented so that it -can be stable and enduring. In the same way, the JSONB representation +can be stable and enduring. In the same way, the JSONB representation of JSON is documented here so that it too can be stable and enduring, not so that applications can read or writes individual bytes. @@ -72,10 +72,10 @@ JSONB is a direct translation of the underlying text JSON. The difference is that JSONB uses a binary encoding that is faster to parse compared to the detailed syntax of text JSON. -Each JSON element is encoded as a header and a payload. The header +Each JSON element is encoded as a header and a payload. The header determines type of element (string, numeric, boolean, null, object, or -array) and the size of the payload. The header can be between 1 and -9 bytes in size. The payload can be any size from zero bytes up to the +array) and the size of the payload. The header can be between 1 and +9 bytes in size. The payload can be any size from zero bytes up to the maximum allowed BLOB size. ### 3.1 Payload Size @@ -84,104 +84,103 @@ The upper four bits of the first byte of the header determine size of the header and possibly also the size of the payload. If the upper four bits have a value between 0 and 11, then the header is exactly one byte in size and the payload size is determined by those -upper four bits. If the upper four bits have a value between 12 and 15, +upper four bits. If the upper four bits have a value between 12 and 15, that means that the total header size is 2, 3, 5, or 9 bytes and the payload size is unsigned big-endian integer that is contained in the -subsequent bytes. The size integer is the one byte that following the +subsequent bytes. The size integer is the one byte that following the initial header byte if the upper four bits are 12, two bytes if the upper bits are 13, four bytes if the upper bits -are 14, and eight bytes if the upper bits are 15. The current design +are 14, and eight bytes if the upper bits are 15. The current design of SQLite does not support BLOB values larger than 2GiB, so the eight-byte variant of the payload size integer will never be used by the current code. The eight-byte payload size integer is included in the specification to allow for future expansion. The header for an element does *not* need to be in its simplest -form. For example, consider the JSON numeric value "`1`". +form. For example, consider the JSON numeric value "`1`". That element can be encode in five different ways: - * `0x13 0x31` - * `0xc3 0x01 0x31` - * `0xd3 0x00 0x01 0x31` - * `0xe3 0x00 0x00 0x00 0x01 0x31` - * `0xf3 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x01 0x31` +* `0x13 0x31` +* `0xc3 0x01 0x31` +* `0xd3 0x00 0x01 0x31` +* `0xe3 0x00 0x00 0x00 0x01 0x31` +* `0xf3 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x01 0x31` The shortest encoding is preferred, of course, and usually happens with -primitive elements such as numbers. However the total size of an array +primitive elements such as numbers. However the total size of an array or object might not be known exactly when the header of the element is -first generated. It is convenient to reserve space for the largest +first generated. It is convenient to reserve space for the largest possible header and then go back and fill in the correct payload size -at the end. This technique can result in array or object headers that +at the end. This technique can result in array or object headers that are larger than absolutely necessary. ### 3.2 Element Type The least-significant four bits of the first byte of the header (the first -byte masked against 0x0f) determine element type. The following codes are +byte masked against 0x0f) determine element type. The following codes are used: -
    -
  1. NULL → -The element is a JSON "null". The payload size for a true JSON NULL must -must be zero. Future versions of SQLite might extend the JSONB format -with elements that have a zero element type but a non-zero size. In that -way, legacy versions of SQLite will interpret the element as a NULL +1. **NULL** → +The element is a JSON "null". The payload size for a true JSON NULL must +must be zero. Future versions of SQLite might extend the JSONB format +with elements that have a zero element type but a non-zero size. In that +way, legacy versions of SQLite will interpret the element as a NULL for backwards compatibility while newer versions will interpret the element in some other way. -

  2. TRUE → -The element is a JSON "true". The payload size must be zero for a actual -"true" value. Elements with type 1 and a non-zero payload size are -reserved for future expansion. Legacy implementations that see an element +2. **TRUE** → +The element is a JSON "true". The payload size must be zero for a actual +"true" value. Elements with type 1 and a non-zero payload size are +reserved for future expansion. Legacy implementations that see an element type of 1 with a non-zero payload size should continue to interpret that element as "true" for compatibility. -

  3. FALSE → -The element is a JSON "false". The payload size must be zero for a actual -"false" value. Elements with type 2 and a non-zero payload size are -reserved for future expansion. Legacy implementations that see an element +3. **FALSE** → +The element is a JSON "false". The payload size must be zero for a actual +"false" value. Elements with type 2 and a non-zero payload size are +reserved for future expansion. Legacy implementations that see an element type of 2 with a non-zero payload size should continue to interpret that element as "false" for compatibility. -

  4. INT → +4. **INT** → The element is a JSON integer value in the canonical -RFC 8259 format, without extensions. The payload is the ASCII +RFC 8259 format, without extensions. The payload is the ASCII text representation of that numeric value. -

  5. INT5 → +5. **INT5** → The element is a JSON integer value that is not in the -canonical format. The payload is the ASCII -text representation of that numeric value. Because the payload is in a +canonical format. The payload is the ASCII +text representation of that numeric value. Because the payload is in a non-standard format, it will need to be translated when the JSONB is converted into RFC 8259 text JSON. -

  6. FLOAT → +6. **FLOAT** → The element is a JSON floating-point value in the canonical -RFC 8259 format, without extensions. The payload is the ASCII +RFC 8259 format, without extensions. The payload is the ASCII text representation of that numeric value. -

  7. FLOAT5 → +7. **FLOAT5** → The element is a JSON floating-point value that is not in the -canonical format. The payload is the ASCII -text representation of that numeric value. Because the payload is in a +canonical format. The payload is the ASCII +text representation of that numeric value. Because the payload is in a non-standard format, it will need to be translated when the JSONB is converted into RFC 8259 text JSON. -

  8. TEXT → +8. **TEXT** → The element is a JSON string value that does not contain any escapes nor any characters that need to be escaped for either SQL or -JSON. The payload is the UTF8 text representation of the string value. -The payload does not include string delimiters. +JSON. The payload is the UTF8 text representation of the string value. +The payload does *not* include string delimiters. -

  9. TEXTJ → +9. **TEXTJ** → The element is a JSON string value that contains -RFC 8259 character escapes (such as "\n" or "\u0020"). +RFC 8259 character escapes (such as "`\n`" or "`\u0020`"). Those escapes will need to be translated into actual UTF8 if this element is [json_extract|extracted] into SQL. The payload is the UTF8 text representation of the escaped string value. -The payload does not include string delimiters. +The payload does *not* include string delimiters. -

  10. TEXT5 → +10. **TEXT5** → The element is a JSON string value that contains character escapes, including some character escapes that part of JSON5 and which are not found in the canonical RFC 8259 spec. @@ -189,41 +188,36 @@ Those escapes will need to be translated into standard JSON prior to rendering the JSON as text, or into their actual UTF8 characters if this element is [json_extract|extracted] into SQL. The payload is the UTF8 text representation of the escaped string value. -The payload does not include string delimiters. +The payload does *not* include string delimiters. -

  11. TEXTRAW → +11. **TEXTRAW** → The element is a JSON string value that contains UTF8 characters that need to be escaped if this string is rendered into standard JSON text. -The payload does not include string delimiters. +The payload does *not* include string delimiters. -

  12. ARRAY → -The element is a JSON array. The payload contains +12. **ARRAY** → +The element is a JSON array. The payload contains JSONB elements that comprise values contained within the array. -

  13. OBJECT → -The element is a JSON object. The payload contains +13. **OBJECT** → +The element is a JSON object. The payload contains pairs of JSONB elements that comprise entries for the JSON object. The first element in each pair must be a string (types 7 through 10). The second element of each pair may be any types, including nested arrays or objects. -

  14. RESERVED-13 → -Reserved for future expansion. Legacy implements that encounter this +14. **RESERVED-14** → +Reserved for future expansion. Legacy implements that encounter this element type should raise an error. -

  15. RESERVED-14 → -Reserved for future expansion. Legacy implements that encounter this +15. **RESERVED-15** → +Reserved for future expansion. Legacy implements that encounter this element type should raise an error. -

  16. RESERVED-15 → -Reserved for future expansion. Legacy implements that encounter this -element type should raise an error. -

- Element types outside the range of 0 to 12 are reserved for future -expansion. The current implement raises an error if see an element type -other than those listed above. However, future versions of SQLite might +expansion. The current implement raises an error if see an element type +other than those listed above. However, future versions of SQLite might use of the three remaining element types to implement indexing or similar optimizations, to speed up lookup against large JSON arrays and/or objects. @@ -235,56 +229,55 @@ When converting from text into JSONB, we do not want the converter subroutine to burn CPU cycles converting elements values into some standard format which might never be used. Format conversion is "lazy" - it is deferred until actually -needed. This has implications for the JSONB format design: +needed. This has implications for the JSONB format design: - 1. Numeric values are stored as text, not a numbers. The values are + 1. Numeric values are stored as text, not a numbers. The values are a direct copy of the text JSON values from which they are derived. - 2. There are multiple element types depending on the details of value - formats. For example, INT is used for pure RFC-8259 integer + 2. There are multiple element types depending on the details of value + formats. For example, INT is used for pure RFC-8259 integer literals and INT5 exists for JSON5 extensions such as hexadecimal - notation. FLOAT is used for pure RFC-8259 floating point literals - and FLOAT5 is used for JSON5 extensions. There are four different + notation. FLOAT is used for pure RFC-8259 floating point literals + and FLOAT5 is used for JSON5 extensions. There are four different representations of strings, depending on where the string came from and how special characters within the string are escaped. A second goal of JSONB is that it should be capable of serving as the "parse tree" for JSON when a JSON value is being processed by the -various [JSON SQL functions] built into SQLite. Before JSONB was +various [JSON SQL functions] built into SQLite. Before JSONB was developed, operations such [json_replace()] and [json_patch()] and similar worked in three stages: - - 1. Translate the text JSON into a internal format that is + 1. Translate the text JSON into a internal format that is easier to scan and edit. - 2. Perform the requested operation on the JSON. - 3. Translate the internal format back into text. + 2. Perform the requested operation on the JSON. + 3. Translate the internal format back into text. JSONB seeks to serve as the internal format directly - bypassing -the first and third stages of that process. Since most of the CPU +the first and third stages of that process. Since most of the CPU cycles are spent on the first and third stages, that suggests that JSONB processing will be much faster than text JSON processing. So when processing JSONB, only the second stage of the three-stage -process is required. But when processing text JSON, it is still necessary -to do stages one and three. If JSONB is to be used as the internal +process is required. But when processing text JSON, it is still necessary +to do stages one and three. If JSONB is to be used as the internal binary representation, this is yet another reason to store numeric -values as text. Storing numbers as text minimizes the amount of -conversion work needed for stages one and three. This is also why -there are four different representations of text in JSONB. Different +values as text. Storing numbers as text minimizes the amount of +conversion work needed for stages one and three. This is also why +there are four different representations of text in JSONB. Different text representations are used for text coming from different sources (RFC-8259 JSON, JSON5, or SQL string values) and conversions only happen if and when they are actually needed. ### 3.4 Valid JSONB BLOBs -A valid JSONB BLOB consists of a single JSON element. The element must -exactly fill the BLOB. This one element is often a JSON object or array +A valid JSONB BLOB consists of a single JSON element. The element must +exactly fill the BLOB. This one element is often a JSON object or array and those usually contain additional elements as its payload, but the -element can be a primite value such a string, number, boolean, or null. +element can be a primitive value such a string, number, boolean, or null. When the built-in JSON functions are attempting to determine if a BLOB argument is a JSONB or just a random BLOB, they look at the header of the outer element to see that it is well-formed and that the element -completely fills the BLOB. If these conditions are met, then the BLOB +completely fills the BLOB. If these conditions are met, then the BLOB is accepted as a JSONB value. diff --git a/libsql-sqlite3/doc/libsql_extensions.md b/libsql-sqlite3/doc/libsql_extensions.md index d4602ac542..ba02f15999 100644 --- a/libsql-sqlite3/doc/libsql_extensions.md +++ b/libsql-sqlite3/doc/libsql_extensions.md @@ -68,16 +68,19 @@ can be amended with `ALTER TABLE ALTER COLUMN` as well: ```sql libsql> CREATE TABLE t(id, v); ``` + ```sql libsql> ALTER TABLE t ALTER COLUMN v TO v NOT NULL CHECK(v < 42); libsql> .schema t CREATE TABLE t(id, v NOT NULL CHECK(v < 42)); ``` + ```sql libsql> ALTER TABLE t ALTER COLUMN v TO v TEXT DEFAULT 'hai'; libsql> .schema t CREATE TABLE t(id, v TEXT DEFAULT 'hai'); ``` + ```sql libsql> ALTER TABLE t ALTER COLUMN v TO v; libsql> .schema t @@ -96,6 +99,7 @@ and can be enabled with a `PRAGMA foreign_keys=ON` statement at runtime. Regular tables use an implicitly defined, unique, 64-bit rowid column as its primary key. If rowid value is not specified during insertion, it's auto-generated with the following heuristics: + 1. Find the current max rowid value. 2. If max value is less than i64::max, use the next available value 3. If max value is i64::max: @@ -110,11 +114,13 @@ The newly introduced `RANDOM ROWID` option can be used to explicitly state that ### Usage `RANDOM ROWID` keywords can be used during table creation, in a manner similar to its syntactic cousin, `WITHOUT ROWID`: + ```sql CREATE TABLE shopping_list(item text, quantity int) RANDOM ROWID; ``` On insertion, pseudorandom rowid values will be generated: + ```sql CREATE TABLE shopping_list(item text, quantity int) RANDOM ROWID; INSERT INTO shopping_list(item, quantity) VALUES ('bread', 2); @@ -131,48 +137,57 @@ rowid item quantity `RANDOM ROWID` is mutually exclusive with `WITHOUT ROWID` option, and cannot be used with tables having an `AUTOINCREMENT` primary key. - ## WebAssembly-based user-defined functions (experimental) -In addition to being able to define functions via the C API (http://www.sqlite.org/c3ref/create_function.html), it's possible to enable experimental support for `CREATE FUNCTION` syntax allowing users to dynamically register functions coded in WebAssembly. +In addition to being able to define functions via the C API (), it's possible to enable experimental support for `CREATE FUNCTION` syntax allowing users to dynamically register functions coded in WebAssembly. Once enabled, `CREATE FUNCTION` and `DROP FUNCTION` are available in SQL. They act as syntactic sugar for managing data stored in a special internal table: `libsql_wasm_func_table(name TEXT, body TEXT)`. This table can also be inspected with regular tools - e.g. to see which functions are registered and what's their source code. ### How to enable This feature is experimental and opt-in, and can be enabled by the following configure: + ```sh ./configure --enable-wasm-runtime ``` Then, in your source code, the internal table for storing WebAssembly source code can be created via `libsql_try_initialize_wasm_func_table(sqlite3 *db)` function. -You can also download a pre-compiled binary from https://github.com/libsql/libsql/releases/tag/libsql-0.1.0, or use a docker image for experiments: -``` +You can also download a pre-compiled binary from , or use a docker image for experiments: + +```bash docker run -it piotrsarna/libsql:libsql-0.1.0-wasm-udf ./libsql ``` #### Configurations WebAssembly runtime can be enabled in multiple configurations: + 1. Based on [Wasmtime](https://wasmtime.dev/), linked statically (default) -```sh -./configure --enable-wasm-runtime -``` + + ```sh + ./configure --enable-wasm-runtime + ``` + 2. Based on [Wasmtime](https://wasmtime.dev/), linked dynamically -```sh -./configure --enable-wasm-runtime-dynamic -``` + + ```sh + ./configure --enable-wasm-runtime-dynamic + ``` + 3. Based on [WasmEdge](https://wasmedge.org/), linked dynamically with `libwasmedge` -```sh -./configure --enable-wasm-runtime-wasmedge -``` -> **NOTE:** WasmEdge backend comes without the ability to translate WebAssembly text format (WAT) to Wasm binary format. In this configuration, user-defined functions can only be defined with their source code passed as a compiled binary blob. In [libSQL bindgen](https://bindgen.libsql.org) you can produce it by checking the "as a binary blob" checkbox. -> **NOTE2:** WasmEdge backend depends on `libwasmedge` compatible with their 0.11.2 release. If your package manager does not have it available, download it from the official [release page](https://github.com/WasmEdge/WasmEdge/releases). -If you're interested in a setup that links `libwasmedge.a` statically, let us know, or, better yet, send a patch! + ```sh + ./configure --enable-wasm-runtime-wasmedge + ``` + + > **NOTE:** WasmEdge backend comes without the ability to translate WebAssembly text format (WAT) to Wasm binary format. In this configuration, user-defined functions can only be defined with their source code passed as a compiled binary blob. In [libSQL bindgen](https://bindgen.libsql.org) you can produce it by checking the "as a binary blob" checkbox. + > **NOTE2:** WasmEdge backend depends on `libwasmedge` compatible with their 0.11.2 release. If your package manager does not have it available, download it from the official [release page](https://github.com/WasmEdge/WasmEdge/releases). + + If you're interested in a setup that links `libwasmedge.a` statically, let us know, or, better yet, send a patch! #### shell support + In order to initialize the internal WebAssembly function lookup table in libsql shell (sqlite3 binary), one can use the `.init_wasm_func_table` command. This command is safe to be called multiple times, even if the internal table already exists. ### CREATE FUNCTION @@ -180,6 +195,7 @@ In order to initialize the internal WebAssembly function lookup table in libsql Creating a function requires providing its name and WebAssembly source code (in WebAssembly text format). The ABI for translating between WebAssembly types and libSQL types is to be standardized soon. Example SQL: + ```sql CREATE FUNCTION IF NOT EXISTS fib LANGUAGE wasm AS ' (module @@ -223,13 +239,15 @@ CREATE FUNCTION IF NOT EXISTS fib LANGUAGE wasm AS ' (export "fib" (func $fib))) '; ``` -[1] WebAssembly source: https://github.com/psarna/libsql_bindgen/blob/55b69d8d08fc0e6e096b37467c05c5dd10398eb7/src/lib.rs#L68-L75 . + +[1] WebAssembly source: . ### Drop function Dropping a dynamically created function can be done via a `DROP FUNCTION` statement. Example: + ```sql DROP FUNCTION IF EXISTS fib; ``` @@ -239,6 +257,7 @@ DROP FUNCTION IF EXISTS fib; This paragraph is based on our [blog post](https://blog.chiselstrike.com/webassembly-functions-for-your-sqlite-compatible-database-7e1ad95a2aa7) which describes the process in more detail. In order for a WebAssembly function to be runnable from libSQL, it must follow its ABI - which in this case can be reduced to "how to translate libSQL types to WebAssembly and back". Fortunately, both projects have a very small set of supported types, so the whole mapping fits in a short table: + | libSQL type | Wasm type | |---|---| | INTEGER | i64 | @@ -248,6 +267,7 @@ In order for a WebAssembly function to be runnable from libSQL, it must follow i | NULL | i32* | where `i32` represents a pointer to WebAssembly memory. Underneath, indirectly represented types are encoded as follows: + | libSQL type | representation | |---|---| | TEXT | [1 byte with value `3` (`SQLITE_TEXT`)][null-terminated string] | @@ -256,9 +276,10 @@ where `i32` represents a pointer to WebAssembly memory. Underneath, indirectly r The compiled module should export at least the function that is supposed to be later used as a user-defined function, and its `memory` instance. -Encoding type translation manually for each function can be cumbersome, so we provide helper libraries for languages compilable to WebAssembly. Right now the only implementation is for Rust: https://crates.io/crates/libsql_bindgen +Encoding type translation manually for each function can be cumbersome, so we provide helper libraries for languages compilable to WebAssembly. Right now the only implementation is for Rust: With `libsql_bindgen`, a native Rust function can be annotated with a macro: + ```rust #[libsql_bindgen::libsql_bindgen] pub fn decrypt(data: String, key: String) -> String { @@ -270,16 +291,19 @@ pub fn decrypt(data: String, key: String) -> String { ``` Compiling the function to WebAssembly will produce code that can be registered as a user-defined function in libSQL. -``` + +```bash cargo build --release --target wasm32-unknown-unknown ``` -For quick experiments, our playground application can be used: https://bindgen.libsql.org +For quick experiments, our playground application can be used: After the function is compiled, it can be registered via SQL by: + ```sql CREATE FUNCTION your_function LANGUAGE wasm AS ``` + , where `` is either a binary .wasm blob or text presented in WebAssembly Text format. See an example in `CREATE FUNCTION` paragraph above. @@ -291,17 +315,19 @@ Write-ahead log is a journaling mode which enables nice write concurrency charac ### API In order to register a new set of virtual WAL methods, these methods need to be implemented. This is the current API: -https://github.com/tursodatabase/libsql/blob/main/libsql-sqlite3/src/wal.h + ### Registering WAL methods After the implementation is ready, the following public functions can be used to manage it: + ```c libsql_wal_methods_find libsql_wal_methods_register libsql_wal_methods_unregister ``` + , and they are quite self-descriptive. They also work similarly to their `sqlite3_vfs*` counterparts, which they were modeled after. It is important to note that wal_methods in themselves should be stateless. There are registered globally, and accessible from every connection. When state needs to be accessed from the WAL methods, state can be passed as the 7th argument to `libsql_open_v2`. This state will then become accessible in the `pMethodData` field of the `libsql_wal` struct passed to the WAL methods. @@ -309,7 +335,8 @@ It is important to note that wal_methods in themselves should be stateless. Ther ### Using WAL methods Custom WAL methods need to be declared when opening a new database connection. -That can be achieved either programatically by using a new flavor of the `sqlite3_open*` function: +That can be achieved either programmatically by using a new flavor of the `sqlite3_open*` function: + ```c int libsql_open( const char *filename, /* Database filename (UTF-8) */ @@ -321,7 +348,8 @@ int libsql_open( ``` ... or via URI, by using a new `wal` parameter: -``` + +```text .open file:test.db?wal=my_impl_of_wal_methods ``` diff --git a/libsql-sqlite3/doc/testrunner.md b/libsql-sqlite3/doc/testrunner.md index d420076c4f..f0c7da64cf 100644 --- a/libsql-sqlite3/doc/testrunner.md +++ b/libsql-sqlite3/doc/testrunner.md @@ -1,35 +1,28 @@ - # The testrunner.tcl Script -
+1. [Overview](#overview) +2. [Binary Tests](#binary_tests) + 1. [Organization of Tcl Tests](#organization_tests) + 2. [Commands to Run Tests](#run_tests) + 3. [Investigating Binary Test Failures](#binary_test_failures) +3. [Source Code Tests](#source_code_tests) + 1. [Commands to Run SQLite Tests](#commands_to_run_tests) + 2. [ZipVFS Tests](#zipvfs_tests) + 3. [Source Code Test Failures](#source_code_test_failures) + 4. [Investigating Source Code Test Failures](#binary_test_failures) +4. [Extra testrunner.tcl Options](#testrunner_options) +5. [Controlling CPU Core Utilization](#cpu_cores) + # 1. Overview -testrunner.tcl is a Tcl script used to run multiple SQLite tests using +testrunner.tcl is a Tcl script used to run multiple SQLite tests using multiple jobs. It supports the following types of tests: - * Tcl test scripts. - - * Tests run with [make] commands. Specifically, at time of writing, +* Tcl test scripts. +* Tests run with [make] commands. Specifically, at time of writing, [make fuzztest], [make mptest], [make sourcetest] and [make threadtest]. testrunner.tcl pipes the output of all tests and builds run into log file @@ -65,6 +58,7 @@ to run tests (see "Binary Tests" below). Sometimes it builds testfixture and other binaries in specific configurations to test (see "Source Tests"). + # 2. Binary Tests The commands described in this section all run various combinations of the Tcl @@ -76,13 +70,14 @@ these tests is therefore: 1. Build the "testfixture" (or "testfixture.exe" for windows) binary using whatever method seems convenient. - 2. Test the binary built in step 1 by running testrunner.tcl with it, + 2. Test the binary built in step 1 by running testrunner.tcl with it, perhaps with various options. The following sub-sections describe the various options that can be passed to testrunner.tcl to test binary testfixture builds. + ## 2.1. Organization of Tcl Tests Tcl tests are stored in files that match the pattern *\*.test*. They are @@ -94,7 +89,7 @@ contain Tcl tests - a handful are Tcl scripts designed to invoke other The **veryquick** set of tests is a subset of all Tcl test scripts in the source tree. In includes most tests, but excludes some that are very slow. Almost all fault-injection tests (those that test the response of the library -to OOM or IO errors) are excluded. It is defined in source file +to OOM or IO errors) are excluded. It is defined in source file *test/permutations.test*. The **full** set of tests includes all Tcl test scripts in the source tree. @@ -104,9 +99,9 @@ source tree. File *permutations.test* defines various test "permutations". A permutation consists of: - * A subset of Tcl test scripts, and +* A subset of Tcl test scripts, and - * Runtime configuration to apply before running each test script +* Runtime configuration to apply before running each test script (e.g. enabling auto-vacuum, or disable lookaside). Running **all** tests is to run all tests in the full test set, plus a dozen @@ -114,6 +109,7 @@ or so permutations. The specific permutations that are run as part of "all" are defined in file *testrunner_data.tcl*. + ## 2.2. Commands to Run Tests To run the "veryquick" test set, use either of the following: @@ -138,11 +134,10 @@ a specified pattern (e.g. all tests that start with "fts5"), either of: ``` Strictly speaking, for a test to be run the pattern must match the script -filename, not including the directory, using the rules of Tcl's +filename, not including the directory, using the rules of Tcl's \[string match\] command. Except that before the matching is done, any "%" characters specified as part of the pattern are transformed to "\*". - To run "all" tests (full + permutations): ``` @@ -150,6 +145,7 @@ To run "all" tests (full + permutations): ``` + ## 2.3. Investigating Binary Test Failures If a test fails, testrunner.tcl reports name of the Tcl test script and, if @@ -171,16 +167,17 @@ Or, if the failure occured as part of a permutation: TODO: An example instead of "$PERMUTATION" and $PATH\_TO\_SCRIPT? + # 3. Source Code Tests -The commands described in this section invoke the C compiler to build +The commands described in this section invoke the C compiler to build binaries from the source tree, then use those binaries to run Tcl and other tests. The advantages of this are that: - * it is possible to test multiple build configurations with a single - command, and +* it is possible to test multiple build configurations with a single + command, and - * it ensures that tests are always run using binaries created with the +* it ensures that tests are always run using binaries created with the same set of compiler options. The testrunner.tcl commands described in this section may be run using @@ -190,10 +187,11 @@ shell that supports SQLite 3.31.1 or newer via "package require sqlite3". TODO: ./configure + Makefile.msc build systems. + ## 3.1. Commands to Run SQLite Tests The **mdevtest** command is equivalent to running the veryquick tests and -the [make fuzztest] target once for each of two --enable-all builds - one +the [make fuzztest] target once for each of two --enable-all builds - one with debugging enabled and one without: ``` @@ -235,7 +233,7 @@ of the specific tests run. As with source code tests, one or more patterns may be appended to any of the above commands (mdevtest, sdevtest or release). In that case only Tcl tests (no fuzz or other tests) that match the specified -pattern are run. For example, to run the just the Tcl rtree tests in all +pattern are run. For example, to run the just the Tcl rtree tests in all builds and configurations supported by "release": ``` @@ -243,6 +241,7 @@ builds and configurations supported by "release": ``` + ## 3.2. Running ZipVFS Tests testrunner.tcl can build a zipvfs-enabled testfixture and use it to run @@ -260,6 +259,7 @@ test both SQLite and Zipvfs with a single command: ``` + ## 3.3. Investigating Source Code Test Failures Investigating a test failure that occurs during source code testing is a @@ -288,6 +288,7 @@ or else to build a testfixture (or testfixture.exe) binary with which to run a Tcl test script, as described above. + # 4. Extra testrunner.tcl Options The testrunner.tcl script options in this section may be used with both source @@ -311,6 +312,7 @@ would normally execute into the testrunner.log file. Example: ``` + # 5. Controlling CPU Core Utilization When running either binary or source code tests, testrunner.tcl reports the @@ -322,7 +324,7 @@ number of jobs it intends to use to stdout. e.g. ... more output ... ``` -By default, testfixture.tcl attempts to set the number of jobs to the number +By default, testfixture.tcl attempts to set the number of jobs to the number of real cores on the machine. This can be overridden using the "--jobs" (or -j) switch: @@ -337,8 +339,5 @@ running by exucuting the following command from the directory containing the testrunner.log and testrunner.db files: ``` - $ ./testfixture $TESTDIR/testrunner.tcl njob $NEW_NUMBER_OF_JOBS + ./testfixture $TESTDIR/testrunner.tcl njob $NEW_NUMBER_OF_JOBS ``` - - - diff --git a/libsql-sqlite3/doc/trusted-schema.md b/libsql-sqlite3/doc/trusted-schema.md index d431fd49a3..9f290ee86c 100644 --- a/libsql-sqlite3/doc/trusted-schema.md +++ b/libsql-sqlite3/doc/trusted-schema.md @@ -22,15 +22,15 @@ maliciously corrupted by an attacker. The basic idea is to tag every SQL function and virtual table with one of three risk levels: - 1. Innocuous - 2. Normal - 3. Direct-Only + 1. Innocuous + 2. Normal + 3. Direct-Only Innocuous functions/vtabs are safe and can be used at any time. Direct-only elements, in contrast, might have cause side-effects and should only be used from top-level SQL, not from within triggers or views nor -in elements of the schema such as CHECK constraint, DEFAULT values, -generated columns, index expressions, or in the WHERE clause of a +in elements of the schema such as CHECK constraint, DEFAULT values, +generated columns, index expressions, or in the WHERE clause of a partial index that are potentially under the control of an attacker. Normal elements behave like Innocuous if TRUSTED\_SCHEMA=on and behave like direct-only if TRUSTED\_SCHEMA=off. @@ -50,14 +50,14 @@ Direct-only elements that have side-effects that go outside the database file in which it lives, or return information from outside of the database file. Examples of direct-only elements include: - 1. The fts3\_tokenizer() function - 2. The writefile() function - 3. The readfile() function - 4. The zipvfs virtual table - 5. The csv virtual table + 1. The fts3\_tokenizer() function + 2. The writefile() function + 3. The readfile() function + 4. The zipvfs virtual table + 5. The csv virtual table We do not want an attacker to be able to add these kinds of things to -the database schema and possibly trick a high-privilege application +the database schema and possibly trick a high-privilege application from performing any of these actions. Therefore, functions and vtabs with side-effects are marked as Direct-Only. @@ -78,48 +78,42 @@ triggers are safe. ## Specific changes - 1. New sqlite3\_db\_config() option SQLITE\_DBCONFIG\_TRUSTED\_SCHEMA for + 1. New sqlite3\_db\_config() option SQLITE\_DBCONFIG\_TRUSTED\_SCHEMA for turning TRUSTED\_SCHEMA on and off. It defaults to ON. - 2. Compile-time option -DSQLITE\_TRUSTED\_SCHEMA=0 causes the default + 2. Compile-time option -DSQLITE\_TRUSTED\_SCHEMA=0 causes the default TRUSTED\_SCHEMA setting to be off. - 3. New pragma "PRAGMA trusted\_schema=(ON\|OFF);". This provides access + 3. New pragma "PRAGMA trusted\_schema=(ON\|OFF);". This provides access to the TRUSTED_SCHEMA setting for application coded using scripting languages or other secondary languages where they are unable to make calls to sqlite3\_db\_config(). - 4. New options for the "enc" parameter to sqlite3\_create\_function() and + 4. New options for the "enc" parameter to sqlite3\_create\_function() and its kin: -
    -
  1. _SQLITE\_INNOCUOUS_ → tags the new functions as Innocuous -
  2. _SQLITE\_DIRECTONLY_ → tags the new functions as Direct-Only -
- - 5. New options to sqlite3\_vtab\_config(): -
    -
  1. _SQLITE\_VTAB\_INNOCUOUS_ → tags the vtab as Innocuous -
  2. _SQLITE\_VTAB\_DIRECTONLY_ → tags the vtab as Direct-Only -
- - 6. Change many of the functions and virtual tables in the SQLite source + - _SQLITE\_INNOCUOUS_ → tags the new functions as Innocuous + - _SQLITE\_DIRECTONLY_ → tags the new functions as Direct-Only + + 5. New options to sqlite3\_vtab\_config(): + - _SQLITE\_VTAB\_INNOCUOUS_ → tags the vtab as Innocuous + - _SQLITE\_VTAB\_DIRECTONLY_ → tags the vtab as Direct-Only + + 6. Change many of the functions and virtual tables in the SQLite source tree to use one of the tags above. - 7. Enhanced PRAGMA function\_list and virtual-table "pragma\_function\_list" + 7. Enhanced PRAGMA function\_list and virtual-table "pragma\_function\_list" with additional columns. The columns now are: - -

The last four columns are new. - - 8. The function\_list PRAGMA now also shows all entries for each function. + - \_name\_ → Name of the function + - \_builtin\_ → 1 for built-in functions. + - \_type\_ → 's'=Scalar, 'a'=Aggregate, 'w'=Window + - \_enc\_ → 'utf8', 'utf16le', or 'utf16be' + - \_narg\_ → number of argument + - \_flags\_ → Bitmask of SQLITE\_INNOCUOUS, SQLITE\_DIRECTONLY, + SQLITE\_DETERMINISTIC, SQLITE\_SUBTYPE, and + SQLITE\_FUNC\_INTERNAL flags. + The last four columns are new. + + 8. The function\_list PRAGMA now also shows all entries for each function. So, for example, if a function can take either 2 or 3 arguments, there are separate rows for the 2-argument and 3-argument versions of the function. @@ -131,10 +125,10 @@ of SQL functions that meet various criteria. For example, to see all SQL functions that are never allowed to be used in the schema or in trigger or views: -~~~ - SELECT DISTINCT name FROM pragma_function_list - WHERE (flags & 0x80000)!=0 - ORDER BY name; +~~~sql +SELECT DISTINCT name FROM pragma_function_list + WHERE (flags & 0x80000)!=0 + ORDER BY name; ~~~ Doing the same is not possible for virtual tables, as a virtual table diff --git a/libsql-sqlite3/doc/vdbesort-memory.md b/libsql-sqlite3/doc/vdbesort-memory.md index 5c3dd62d2f..1ac80efd13 100644 --- a/libsql-sqlite3/doc/vdbesort-memory.md +++ b/libsql-sqlite3/doc/vdbesort-memory.md @@ -5,8 +5,8 @@ Memory allocation is slightly different depending on: - * whether or not SQLITE_CONFIG_SMALL_MALLOC is set, and - * whether or not worker threads are enabled. +* whether or not SQLITE_CONFIG_SMALL_MALLOC is set, and +* whether or not worker threads are enabled. ## SQLITE_CONFIG_SMALL_MALLOC=0 @@ -27,7 +27,7 @@ main thread to continue to accumulate keys. Buffers are reused once they have been flushed, so in this case at most (nWorker+1) buffers are allocated and used, where nWorker is the number of configured worker threads. -There are no other significant users of heap memory in the sorter module. +There are no other significant users of heap memory in the sorter module. Once sorted buffers of keys have been flushed to disk, they are read back either by mapping the file (via sqlite3_file.xFetch()) or else read back in one page at a time. diff --git a/libsql-sqlite3/doc/wal-lock.md b/libsql-sqlite3/doc/wal-lock.md index d74bb88b63..a2ea996cd6 100644 --- a/libsql-sqlite3/doc/wal-lock.md +++ b/libsql-sqlite3/doc/wal-lock.md @@ -3,8 +3,8 @@ On some Unix-like systems, SQLite may be configured to use POSIX blocking locks by: - * building the library with SQLITE\_ENABLE\_SETLK\_TIMEOUT defined, and - * configuring a timeout in ms using the sqlite3\_busy\_timeout() API. +* building the library with SQLITE\_ENABLE\_SETLK\_TIMEOUT defined, and +* configuring a timeout in ms using the sqlite3\_busy\_timeout() API. Blocking locks may be advantageous as (a) waiting database clients do not need to continuously poll the database lock, and (b) using blocking locks @@ -12,7 +12,7 @@ facilitates transfer of OS priority between processes when a high priority process is blocked by a lower priority one. Only read/write clients use blocking locks. Clients that have read-only access -to the \*-shm file nevery use blocking locks. +to the \*-shm file never use blocking locks. Threads or processes that access a single database at a time never deadlock as a result of blocking database locks. But it is of course possible for threads @@ -22,9 +22,9 @@ detect the deadlock and return an error. ## Wal Recovery Wal database "recovery" is a process required when the number of connected -database clients changes from zero to one. In this case, a client is +database clients changes from zero to one. In this case, a client is considered to connect to the database when it first reads data from it. -Before recovery commences, an exclusive WRITER lock is taken. +Before recovery commences, an exclusive WRITER lock is taken. Without blocking locks, if two clients attempt recovery simultaneously, one fails to obtain the WRITER lock and either invokes the busy-handler callback or @@ -33,7 +33,7 @@ client blocks on the WRITER lock. ## Database Readers -Usually, read-only are not blocked by any other database clients, so they +Usually, read-only are not blocked by any other database clients, so they have no need of blocking locks. If a read-only transaction is being opened on a snapshot, the CHECKPOINTER @@ -48,10 +48,10 @@ cases. A database writer must obtain the exclusive WRITER lock. It uses a blocking lock to do so if any of the following are true: - * the transaction is an implicit one consisting of a single DML or DDL +* the transaction is an implicit one consisting of a single DML or DDL statement, or - * the transaction is opened using BEGIN IMMEDIATE or BEGIN EXCLUSIVE, or - * the first SQL statement executed following the BEGIN command is a DML or +* the transaction is opened using BEGIN IMMEDIATE or BEGIN EXCLUSIVE, or +* the first SQL statement executed following the BEGIN command is a DML or DDL statement (not a read-only statement like a SELECT). In other words, in all cases except when an open read-transaction is upgraded @@ -61,11 +61,11 @@ to a write-transaction. In that case a non-blocking lock is used. Database checkpointers takes the following locks, in order: - * The exclusive CHECKPOINTER lock. - * The exclusive WRITER lock (FULL, RESTART and TRUNCATE only). - * Exclusive lock on read-mark slots 1-N. These are immediately released after being taken. - * Exclusive lock on read-mark 0. - * Exclusive lock on read-mark slots 1-N again. These are immediately released +* The exclusive CHECKPOINTER lock. +* The exclusive WRITER lock (FULL, RESTART and TRUNCATE only). +* Exclusive lock on read-mark slots 1-N. These are immediately released after being taken. +* Exclusive lock on read-mark 0. +* Exclusive lock on read-mark slots 1-N again. These are immediately released after being taken (RESTART and TRUNCATE only). All of the above use blocking locks. @@ -75,14 +75,12 @@ All of the above use blocking locks. With blocking locks configured, the only cases in which clients should see an SQLITE\_BUSY error are: - * if the OS does not grant a blocking lock before the configured timeout +* if the OS does not grant a blocking lock before the configured timeout expires, and - * when an open read-transaction is upgraded to a write-transaction. +* when an open read-transaction is upgraded to a write-transaction. In all other cases the blocking locks implementation should prevent clients from having to handle SQLITE\_BUSY errors and facilitate appropriate transfer of priorities between competing clients. Clients that lock multiple databases simultaneously must be wary of deadlock. - - diff --git a/libsql-sqlite3/ext/jni/README.md b/libsql-sqlite3/ext/jni/README.md index fc7b5f7611..634277cf49 100644 --- a/libsql-sqlite3/ext/jni/README.md +++ b/libsql-sqlite3/ext/jni/README.md @@ -3,7 +3,7 @@ SQLite3 via JNI This directory houses a Java Native Interface (JNI) binding for the sqlite3 API. If you are reading this from the distribution ZIP file, -links to resources in the canonical source tree will note work. The +links to resources in the canonical source tree will not work. The canonical copy of this file can be browsed at: @@ -35,7 +35,7 @@ Project goals/requirements: build-level dependencies for specific IDEs and toolchains. We welcome the addition of build files for arbitrary environments insofar as they neither interfere with each other nor become - a maintenance burden for the sqlite developers. + a maintenance burden for the SQLite developers. Non-goals: @@ -47,7 +47,7 @@ Non-goals: - Support for mixed-mode operation, where client code accesses SQLite both via the Java-side API and the C API via their own native - code. Such cases would be a minefield of potential mis-interactions + code. Such cases would be a minefield of potential misinteractions between this project's JNI bindings and mixed-mode client code. @@ -169,7 +169,7 @@ deliberately return an error code, instead of segfaulting, when passed a `null`. Client-defined callbacks _must never throw exceptions_ unless _very -explitly documented_ as being throw-safe. Exceptions are generally +explicitly documented_ as being throw-safe. Exceptions are generally reserved for higher-level bindings which are constructed to specifically deal with them and ensure that they do not leak C-level resources. In some cases, callback handlers are permitted to throw, in diff --git a/libsql-sqlite3/test/json/README.md b/libsql-sqlite3/test/json/README.md index 4ebbda6d3f..fd01d0a8fe 100644 --- a/libsql-sqlite3/test/json/README.md +++ b/libsql-sqlite3/test/json/README.md @@ -2,65 +2,58 @@ The files in this subdirectory are used to help measure the performance of the SQLite JSON functions, especially in relation to handling large JSON inputs. -# 1.0 Prerequisites +# Prerequisites - * Standard SQLite build environment (SQLite source tree, compiler, make, etc.) +* Standard SQLite build environment (SQLite source tree, compiler, make, etc.) +* Valgrind +* Fossil (only the "fossil xdiff" command is used by this procedure) +* tclsh - * Valgrind +# Setup - * Fossil (only the "fossil xdiff" command is used by this procedure) - - * tclsh - -# 2.0 Setup - - * Run: "`tclsh json-generator.tcl | sqlite3 json100mb.db`" to create +* Run: `tclsh json-generator.tcl | sqlite3 json100mb.db` to create the 100 megabyte test database. Do this so that the "json100mb.db" file lands in the directory from which you will run tests, not in the test/json subdirectory of the source tree. - - * Make a copy of "json100mb.db" into "jsonb100mb.db" - change the prefix +* Make a copy of "json100mb.db" into "jsonb100mb.db" - change the prefix from "json" to "jsonb". +* Bring up jsonb100mb.db in the sqlite3 command-line shell. + Convert all of the content into JSONB using a commands like this: - * Bring up jsonb100mb.db in the sqlite3 command-line shell. - Convert all of the content into JSONB using a commands like this: + ```sql + UPDATE data1 SET x=jsonb(x); + VACUUM; + ``` -> UPDATE data1 SET x=jsonb(x); -> VACUUM; +* Build the baseline sqlite3.c file with sqlite3.h and shell.c. - * Build the baseline sqlite3.c file with sqlite3.h and shell.c. + ```sh + make clean sqlite3.c + ``` -> make clean sqlite3.c - - * Run "`sh json-speed-check.sh trunk`". This creates the baseline - profile in "jout-trunk.txt" for the preformance test using text JSON. - - * Run "`sh json-speed-check.sh trunk --jsonb`". This creates the +* Run "`sh json-speed-check.sh trunk`". This creates the baseline + profile in "jout-trunk.txt" for the performance test using text JSON. +* Run "`sh json-speed-check.sh trunk --jsonb`". This creates the baseline profile in "joutb-trunk.txt" for the performance test for processing JSONB - - * (Optional) Verify that the json100mb.db database really does contain - approximately 100MB of JSON content by running: - -> SELECT sum(length(x)) FROM data1; -> SELECT * FROM data1 WHERE NOT json_valid(x); - -# 3.0 Testing - - * Build the sqlite3.c (with sqlite3.h and shell.c) to be tested. - - * Run "`sh json-speed-check.sh x1`". The profile output will appear - in jout-x1.txt. Substitute any label you want in place of "x1". - - * Run "`sh json-speed-check.sh x1 --jsonb`". The profile output will appear - in joutb-x1.txt. Substitute any label you want in place of "x1". - - * Run the script shown below in the CLI. - Divide 2500 by the real elapse time from this test - to get an estimate for number of MB/s that the JSON parser is - able to process. - -> .open json100mb.db -> .timer on -> WITH RECURSIVE c(n) AS (VALUES(1) UNION ALL SELECT n+1 FROM c WHERE n<25) -> SELECT sum(json_valid(x)) FROM c, data1; +* (Optional) Verify that the json100mb.db database really does contain + approximately 100MB of JSON content by running: + + ```sql + SELECT sum(length(x)) FROM data1; + SELECT * FROM data1 WHERE NOT json_valid(x); + ``` + +# Testing + +* Build the sqlite3.c (with sqlite3.h and shell.c) to be tested. +* Run "`sh json-speed-check.sh x1`". The profile output will appear in jout-x1.txt. Substitute any label you want in place of "x1". +* Run "`sh json-speed-check.sh x1 --jsonb`". The profile output will appear in joutb-x1.txt. Substitute any label you want in place of "x1". +* Run the script shown below in the CLI. Divide 2500 by the real elapse time from this test to get an estimate for number of MB/s that the JSON parser is able to process. + + ```sql + .open json100mb.db + .timer on + WITH RECURSIVE c(n) AS (VALUES(1) UNION ALL SELECT n+1 FROM c WHERE n<25) + SELECT sum(json_valid(x)) FROM c, data1; + ``` diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md index 563875a7f2..b226a673cf 100644 --- a/tools/fuzz/README.md +++ b/tools/fuzz/README.md @@ -1,6 +1,6 @@ # Fuzz -install carfo afl: +install cargo afl: `cargo install cargo-afl` build the fuzz crate: @@ -11,5 +11,4 @@ run the tests: cargo afl fuzz -i dicts -x dicts/sql.dict -o out target/debug/fuzz parser ''' - -for more infos: https://rust-fuzz.github.io/book/afl.html +for more info: From 70efc568b933ca9e623ef43537dddfdda9f0372d Mon Sep 17 00:00:00 2001 From: Josip Igrec Date: Thu, 23 Jan 2025 17:44:40 +0100 Subject: [PATCH 2/2] Add output from `cargo xtask build-bundled` --- libsql-ffi/bundled/src/sqlite3.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index a12f706ef7..0dbfeb269d 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -28,7 +28,14 @@ ** README.md ** configure ** configure.ac +** doc/compile-for-windows.md +** doc/jsonb.md +** doc/testrunner.md +** doc/trusted-schema.md +** doc/vdbesort-memory.md +** doc/wal-lock.md ** ext/fts5/fts5_tokenize.c +** ext/jni/README.md ** ext/jni/src/org/sqlite/jni/capi/CollationNeededCallback.java ** ext/jni/src/org/sqlite/jni/capi/CommitHookCallback.java ** ext/jni/src/org/sqlite/jni/capi/PreupdateHookCallback.java @@ -81,6 +88,7 @@ ** src/where.c ** src/wherecode.c ** test/all.test +** test/json/README.md ** test/permutations.test ** test/rowvaluevtab.test ** tool/mkkeywordhash.c