From c412b24789a8c8b9a7a4cb1222f80015f147e32b Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 20 Dec 2024 09:50:01 +0100 Subject: [PATCH] Add `filter_map`, `dedup` and some variants to the stdlib (#2120) * Stdlib: add `filter_map`, `dedup` and its variants * Fix NLS stack overflow on Windows * Get rid of hash_dedup for now, split dedup_sorted out of sort_dedup * Update snapshot tests --- ...ontract_nested_custom_diagnostics.ncl.snap | 4 +- core/stdlib/std.ncl | 157 +++++++++++++++++- core/tests/integration/main.rs | 4 +- lsp/nls/src/main.rs | 15 +- lsp/nls/src/server.rs | 1 + 5 files changed, 175 insertions(+), 6 deletions(-) diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap index 7b26d4abc6..d6bc75a1c8 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap @@ -21,9 +21,9 @@ warning: plain functions as contracts are deprecated = wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom` warning: plain functions as contracts are deprecated - ┌─ :1549:9 + ┌─ :1704:9 │ -1549 │ %contract/apply% contract (%label/push_diag% label) value, +1704 │ %contract/apply% contract (%label/push_diag% label) value, │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ applied to this term │ ┌─ [INPUTS_PATH]/errors/subcontract_nested_custom_diagnostics.ncl:3:21 diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl index df35ae1571..91086278d7 100644 --- a/core/stdlib/std.ncl +++ b/core/stdlib/std.ncl @@ -917,6 +917,161 @@ xs |> std.array.length |> std.array.generate (fun i => f i (std.array.at i xs)), + + filter_map + : forall a b. (a -> [| 'Some b, 'None |]) -> Array a -> Array b + | doc m%" + Applies a function to every element in the given array, filtering out + `'None` results. `filter_map` combines `std.array.map` and + `std.array.filter` in a single pass. + + # Examples + + ```nickel + ["1", "hello", "2", "world"] + |> std.array.filter_map (fun x => + if std.string.is_match "^[+-]?\\d+$" x then + 'Some (std.string.to_number x) + else + 'None + ) + # => [ 1, 2 ] + ``` + "% + = fun f array => + fold_left + (fun acc x => + f x + |> match { + 'Some y => acc @ [y], + 'None => acc, + } + ) + [] + array, + + dedup + : Array Dyn -> Array Dyn + | doc m%" + Removes duplicates from an array. + + # Performance + + This function relies on equality and has a quadratic complexity in the + length of the array. It might thus be slow for large arrays or if + called repeatedly. Prefer `std.array.sort_dedup` or + `std.array.dedup_sorted` if you have efficiency concerns. + + # Examples + + ```nickel multiline + std.array.dedup [ 4, 2, 1, 3, 5, 2, 1, 4 ] + # => [ 4, 2, 1, 3, 5 ] + + std.array.dedup [ "hello", "world", "hello" ] + # => [ "hello", "world" ] + ``` + "% + = fun array => + let length = %array/length% array in + + fold_left + (fun acc x => + if elem x acc then + acc + else + acc @ [x] + ) + [] + array, + + dedup_sorted + : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a + | doc m%" + Given an array already sorted by the given ordering function, remove duplicates. + + # Preconditions + + The input array must be sorted using the same order as provided to this + function. Otherwise, some duplicates might not be removed. + + # Performance + + As opposed to `std.array.dedup`, this function has a linear time + complexity, which should improve performance especially on large arrays. + + If you sort the array only to remove duplicates, you can use + `std.array.sort_dedup` directly (which handles both sorting and + deduplication). + + # Examples + + ```nickel multiline + std.array.dedup_sorted std.number.compare [ 1, 2, 2, 3, 4, 4, 4, 5 ] + # => [ 1, 2, 3, 4, 5 ] + + std.array.dedup_sorted std.string.compare [ "hello", "world", "world" ] + # => [ "hello", "world" ] + ``` + "% + = fun cmp array => + let length = %array/length% array in + + let rec go = fun acc n => + if n == length then + acc + else + let x = %array/at% array n in + + # we would prefer to use the primitive equality here instead of + # relying on `cmp` returning `'Equal`. The latter 1. requires that + # the user-provided function is correct and 2. is more costly. + # + # Unfortunately, that would break parametricity and require to + # substitute `a` for `Dyn` in the type of `sort_dedup`, which makes + # it more annoying to use with otherwise statically typed comparison + # function. So for now, we rely on the comparison function. + let acc = + if n == 0 + || cmp (%array/at% array (n - 1)) x != 'Equal then + acc @ [x] + else + acc + in + + go acc (n + 1) + in + + go [] 0, + + sort_dedup + : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a + | doc m%" + Sorts an array based on the provided comparison operator and removes + duplicates. + + This is a combination of `std.array.sort` and `std.array.dedup_sorted`. + + # Performance + + As opposed to `std.array.dedup`, this function has a better time + complexity (`O(n*log(n))` where `n` is the size of the array), which + should improve performance especially on large arrays. + + # Examples + + ```nickel multiline + std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ] + # => [ 1, 2, 3, 4, 5 ] + + std.array.sort_dedup std.string.compare [ "world", "hello", "world" ] + # => [ "hello", "world" ] + ``` + "% + = fun cmp array => + array + |> sort cmp + |> dedup_sorted cmp, }, contract = { @@ -4386,7 +4541,7 @@ %contract/custom% (fun _label _value => 'Error { message = msg }), fail_with - | String -> Dyn + | String -> (forall a. a) | doc m%" Abort the evaluation with the given message. The error will be reported as a contract violation, as `fail_with` uses `std.FailWith` under the hood. diff --git a/core/tests/integration/main.rs b/core/tests/integration/main.rs index 420afb81c0..fdc1b498a4 100644 --- a/core/tests/integration/main.rs +++ b/core/tests/integration/main.rs @@ -27,8 +27,8 @@ fn check_annotated_nickel_file(path: &str) { let test: TestCase = read_annotated_test_case(path).expect("Failed to parse annotated program"); - // By default, cargo runs tests with a 2MB stack, which we can overflow in - // debug mode. To avoid this we run the tests with an increased stack size. + // By default, cargo runs tests with a 2MB stack, which is easy to overflow. To avoid this we + // run the tests with an increased stack size. const STACK_SIZE: usize = 4 * 1024 * 1024; let path = String::from(project_root().join(path).to_string_lossy()); diff --git a/lsp/nls/src/main.rs b/lsp/nls/src/main.rs index 57f162daef..092203234a 100644 --- a/lsp/nls/src/main.rs +++ b/lsp/nls/src/main.rs @@ -1,4 +1,4 @@ -use std::{fs, io, path::PathBuf}; +use std::{fs, io, path::PathBuf, thread}; use anyhow::Result; @@ -29,6 +29,10 @@ mod usage; mod utils; mod world; +// Default stack size is 1MB on Windows, which is too small. We make it 8MB, which is the default +// size on Linux. +const STACK_SIZE: usize = 8 * 1024 * 1024; + use crate::{config::LspConfig, trace::Trace}; #[derive(clap::Parser, Debug)] @@ -59,6 +63,15 @@ struct Options { } fn main() -> Result<()> { + let handle = thread::Builder::new() + .stack_size(STACK_SIZE) + .spawn(run) + .unwrap(); + + handle.join().unwrap() +} + +fn run() -> Result<()> { use clap::Parser; env_logger::init(); diff --git a/lsp/nls/src/server.rs b/lsp/nls/src/server.rs index 04bd43c0fe..e6371b3c6d 100644 --- a/lsp/nls/src/server.rs +++ b/lsp/nls/src/server.rs @@ -96,6 +96,7 @@ impl Server { .send(Message::Response(response)) .unwrap(); } + pub(crate) fn notify(&mut self, notification: Notification) { trace!("Sending notification: {:#?}", notification); self.connection