diff --git a/CHANGELOG.md b/CHANGELOG.md index 99b8c83d..14ea9c68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ `trim_right` functions, which have been deprecated. - The `result.nil_error` function has been deprecated in favour of `result.replace_error`. +- The performance of `string.trim`, `string.trim_start`, and `string.trim_end` + has been improved on JavaScript. ## v0.41.0 - 2024-10-31 diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index a70309e5..7ad5ce97 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -283,31 +283,60 @@ export function split_once(haystack, needle) { } } -const unicode_whitespaces = [ - "\u0020", // Space - "\u0009", // Horizontal tab - "\u000A", // Line feed - "\u000B", // Vertical tab - "\u000C", // Form feed - "\u000D", // Carriage return - "\u0085", // Next line - "\u2028", // Line separator - "\u2029", // Paragraph separator -].join(""); - -const left_trim_regex = new RegExp(`^([${unicode_whitespaces}]*)`, "g"); -const right_trim_regex = new RegExp(`([${unicode_whitespaces}]*)$`, "g"); +function isUnicodeWhitespace(c) { + return ( + c === "\u0020" || // Space + c === "\u0009" || // Horizontal tab + c === "\u000A" || // Line feed + c === "\u000B" || // Vertical tab + c === "\u000C" || // Form feed + c === "\u000D" || // Carriage return + c === "\u0085" || // Next line + c === "\u2028" || // Line separator + c === "\u2029" // Paragraph separator + ); +} export function trim(string) { - return trim_start(trim_end(string)); + const start_index = find_non_whitespace_char(string); + const end_index = Math.max( + rfind_non_whitespace_char(string) + 1, + start_index + ); + + return string.substring(start_index, end_index); } export function trim_start(string) { - return string.replace(left_trim_regex, ""); + return string.substring(find_non_whitespace_char(string)); } export function trim_end(string) { - return string.replace(right_trim_regex, ""); + return string.substring(0, rfind_non_whitespace_char(string) + 1); +} + +function find_non_whitespace_char(string) { + let i = 0; + + for (; i < string.length; i++) { + if (!isUnicodeWhitespace(string[i])) { + break; + } + } + + return i; +} + +function rfind_non_whitespace_char(string) { + let i = string.length - 1; + + for (; i >= 0; i--) { + if (!isUnicodeWhitespace(string[i])) { + break; + } + } + + return i; } export function bit_array_from_string(string) { diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index c49f543c..bc7bfb37 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -176,6 +176,23 @@ pub fn trim_end_test() { |> should.equal(" hats") } +pub fn trim_whole_string_test() { + let s = + "\u{0020}\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0085}\u{2028}\u{2029}" + + s + |> string.trim_start + |> should.equal("") + + s + |> string.trim_end + |> should.equal("") + + s + |> string.trim + |> should.equal("") +} + // unicode whitespaces pub fn trim_horizontal_tab_test() { "hats\u{0009}"