From 2a880da2aac534c3d44c88d0df1a6cf9f69eb448 Mon Sep 17 00:00:00 2001 From: Andrew Branch Date: Wed, 18 Dec 2024 14:05:39 -0800 Subject: [PATCH 1/5] Write path normalization without array allocations or regexes --- src/compiler/path.ts | 110 +++++++++++++++++++++++++----- src/testRunner/unittests/paths.ts | 15 ++++ 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/src/compiler/path.ts b/src/compiler/path.ts index b05216adc47b5..3f86b8260bf53 100644 --- a/src/compiler/path.ts +++ b/src/compiler/path.ts @@ -624,27 +624,103 @@ export function getNormalizedPathComponents(path: string, currentDirectory: stri } /** @internal */ -export function getNormalizedAbsolutePath(fileName: string, currentDirectory: string | undefined): string { - return getPathFromPathComponents(getNormalizedPathComponents(fileName, currentDirectory)); +export function getNormalizedAbsolutePath(path: string, currentDirectory: string | undefined): string { + let rootLength = getRootLength(path); + if (rootLength === 0 && currentDirectory) { + path = combinePaths(currentDirectory, path); + rootLength = getRootLength(path); + } + const root = path.substring(0, rootLength); + const normalizedRoot = root && normalizeSlashes(root); + // `normalized` is only initialized once `path` is determined to be non-normalized + let normalized = normalizedRoot === root ? undefined : normalizedRoot; + let index = rootLength; + let segmentStart = index; + let normalizedUpTo = index; + let seenNonDotDotSegment = rootLength !== 0; + while (index < path.length) { + // At beginning of segment + segmentStart = index; + let ch = path.charCodeAt(index); + while (isAnyDirectorySeparator(ch) && index + 1 < path.length) { + index++; + ch = path.charCodeAt(index); + } + if (index > segmentStart) { + if (normalized === undefined) { + // Seen superfluous separator + normalized = path.substring(0, segmentStart - 1); + } + segmentStart = index; + } + // Past any superfluous separators + const sepIndex = path.indexOf(directorySeparator, index + 1); + const altSepIndex = path.indexOf(altDirectorySeparator, index + 1); + let segmentEnd = sepIndex === -1 ? altSepIndex : altSepIndex === -1 ? sepIndex : Math.min(sepIndex, altSepIndex); + if (segmentEnd === -1) { + segmentEnd = path.length; + } + if (segmentEnd === altSepIndex && normalized === undefined) { + // Seen backslash + normalized = path.substring(0, segmentStart); + } + const segmentLength = segmentEnd - segmentStart; + if (segmentLength === 1 && path.charCodeAt(index) === CharacterCodes.dot) { + // "." segment (skip) + if (normalized === undefined) { + normalized = path.substring(0, normalizedUpTo); + } + } + else if (segmentLength === 2 && path.charCodeAt(index) === CharacterCodes.dot && path.charCodeAt(index + 1) === CharacterCodes.dot) { + // ".." segment + if (!seenNonDotDotSegment) { + if (normalized !== undefined) { + normalized += normalized.length === rootLength ? ".." : "/.."; + } + else { + normalizedUpTo = index + 2; + } + } + else if (normalized === undefined) { + if (normalizedUpTo - 2 >= 0) { + normalized = path.substring(0, Math.max(rootLength, path.lastIndexOf(directorySeparator, normalizedUpTo - 2))); + } + else { + normalized = path.substring(0, normalizedUpTo); + } + } + else { + const lastSlash = normalized.lastIndexOf(directorySeparator); + if (lastSlash !== -1) { + normalized = normalized.substring(0, Math.max(rootLength, lastSlash)); + } + else { + normalized = normalizedRoot; + } + if (normalized.length === rootLength) { + seenNonDotDotSegment = rootLength !== 0; + } + } + } + else if (normalized !== undefined) { + if (normalized.length !== rootLength) { + normalized += directorySeparator; + } + seenNonDotDotSegment = true; + normalized += path.substring(segmentStart, segmentEnd); + } + else { + seenNonDotDotSegment = true; + normalizedUpTo = segmentEnd; + } + index = segmentEnd + 1; + } + return normalized ?? (path.length > rootLength ? removeTrailingDirectorySeparator(path) : path); } /** @internal */ export function normalizePath(path: string): string { - path = normalizeSlashes(path); - // Most paths don't require normalization - if (!relativePathSegmentRegExp.test(path)) { - return path; - } - // Some paths only require cleanup of `/./` or leading `./` - const simplified = path.replace(/\/\.\//g, "/").replace(/^\.\//, ""); - if (simplified !== path) { - path = simplified; - if (!relativePathSegmentRegExp.test(path)) { - return path; - } - } - // Other paths require full normalization - const normalized = getPathFromPathComponents(reducePathComponents(getPathComponents(path))); + const normalized = getNormalizedAbsolutePath(path, ""); return normalized && hasTrailingDirectorySeparator(path) ? ensureTrailingDirectorySeparator(normalized) : normalized; } diff --git a/src/testRunner/unittests/paths.ts b/src/testRunner/unittests/paths.ts index e76bdc7cd26de..743e791baa181 100644 --- a/src/testRunner/unittests/paths.ts +++ b/src/testRunner/unittests/paths.ts @@ -317,9 +317,24 @@ describe("unittests:: core paths", () => { assert.strictEqual(ts.getNormalizedAbsolutePath("", ""), ""); assert.strictEqual(ts.getNormalizedAbsolutePath(".", ""), ""); assert.strictEqual(ts.getNormalizedAbsolutePath("./", ""), ""); + assert.strictEqual(ts.getNormalizedAbsolutePath("./a", ""), "a"); // Strangely, these do not normalize to the empty string. assert.strictEqual(ts.getNormalizedAbsolutePath("..", ""), ".."); assert.strictEqual(ts.getNormalizedAbsolutePath("../", ""), ".."); + assert.strictEqual(ts.getNormalizedAbsolutePath("../..", ""), "../.."); + assert.strictEqual(ts.getNormalizedAbsolutePath("../../", ""), "../.."); + assert.strictEqual(ts.getNormalizedAbsolutePath("./..", ""), ".."); + assert.strictEqual(ts.getNormalizedAbsolutePath("../../a/..", ""), "../.."); + + // More .. segments + assert.strictEqual(ts.getNormalizedAbsolutePath("src/ts/foo/../../../bar/bar.ts", ""), "bar/bar.ts"); + assert.strictEqual(ts.getNormalizedAbsolutePath("src/ts/foo/../../..", ""), ""); + // not a real URL root! + assert.strictEqual(ts.getNormalizedAbsolutePath("file:/Users/matb/projects/san/../../../../../../typings/@epic/Core.d.ts", ""), "../typings/@epic/Core.d.ts"); + // the root is `file://Users/` + assert.strictEqual(ts.getNormalizedAbsolutePath("file://Users/matb/projects/san/../../../../../../typings/@epic/Core.d.ts", ""), "file://Users/typings/@epic/Core.d.ts"); + // this is real + assert.strictEqual(ts.getNormalizedAbsolutePath("file:///Users/matb/projects/san/../../../../../../typings/@epic/Core.d.ts", ""), "file:///typings/@epic/Core.d.ts"); // Interaction between relative paths and currentDirectory. assert.strictEqual(ts.getNormalizedAbsolutePath("", "/home"), "/home"); From 9ab5fbddb4ca14ce71812adc770dc69c3c7563a3 Mon Sep 17 00:00:00 2001 From: Andrew Branch Date: Wed, 18 Dec 2024 16:05:47 -0800 Subject: [PATCH 2/5] Restore regex fast path --- src/compiler/path.ts | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/compiler/path.ts b/src/compiler/path.ts index 3f86b8260bf53..f46b70e9283af 100644 --- a/src/compiler/path.ts +++ b/src/compiler/path.ts @@ -630,6 +630,10 @@ export function getNormalizedAbsolutePath(path: string, currentDirectory: string path = combinePaths(currentDirectory, path); rootLength = getRootLength(path); } + const simple = simpleNormalizePath(path); + if (simple !== undefined) { + return simple; + } const root = path.substring(0, rootLength); const normalizedRoot = root && normalizeSlashes(root); // `normalized` is only initialized once `path` is determined to be non-normalized @@ -720,10 +724,31 @@ export function getNormalizedAbsolutePath(path: string, currentDirectory: string /** @internal */ export function normalizePath(path: string): string { - const normalized = getNormalizedAbsolutePath(path, ""); + let normalized = simpleNormalizePath(path); + if (normalized !== undefined) { + return normalized; + } + normalized = getNormalizedAbsolutePath(path, ""); return normalized && hasTrailingDirectorySeparator(path) ? ensureTrailingDirectorySeparator(normalized) : normalized; } +function simpleNormalizePath(path: string): string | undefined { + path = normalizeSlashes(path); + // Most paths don't require normalization + if (!relativePathSegmentRegExp.test(path)) { + return path; + } + // Some paths only require cleanup of `/./` or leading `./` + const simplified = path.replace(/\/\.\//g, "/").replace(/^\.\//, ""); + if (simplified !== path) { + path = simplified; + if (!relativePathSegmentRegExp.test(path)) { + return path; + } + } + return undefined; +} + function getPathWithoutRoot(pathComponents: readonly string[]) { if (pathComponents.length === 0) return ""; return pathComponents.slice(1).join(directorySeparator); From 44f665bdbf10f4d6ae39e30097550a0c738f2db0 Mon Sep 17 00:00:00 2001 From: Andrew Branch Date: Wed, 8 Jan 2025 16:58:10 -0800 Subject: [PATCH 3/5] Fix tests and improve based on benchmarks --- src/compiler/path.ts | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/src/compiler/path.ts b/src/compiler/path.ts index f46b70e9283af..d8679b98744a9 100644 --- a/src/compiler/path.ts +++ b/src/compiler/path.ts @@ -630,50 +630,46 @@ export function getNormalizedAbsolutePath(path: string, currentDirectory: string path = combinePaths(currentDirectory, path); rootLength = getRootLength(path); } - const simple = simpleNormalizePath(path); - if (simple !== undefined) { - return simple; + else { + // combinePaths normalizes slashes, so not necessary in the other branch + path = normalizeSlashes(path); } + + const simpleNormalized = simpleNormalizePath(path); + if (simpleNormalized !== undefined) { + return simpleNormalized.length > rootLength ? removeTrailingDirectorySeparator(simpleNormalized) : simpleNormalized; + } + + const length = path.length; const root = path.substring(0, rootLength); - const normalizedRoot = root && normalizeSlashes(root); // `normalized` is only initialized once `path` is determined to be non-normalized - let normalized = normalizedRoot === root ? undefined : normalizedRoot; + let normalized; let index = rootLength; let segmentStart = index; let normalizedUpTo = index; let seenNonDotDotSegment = rootLength !== 0; - while (index < path.length) { + while (index < length) { // At beginning of segment segmentStart = index; let ch = path.charCodeAt(index); - while (isAnyDirectorySeparator(ch) && index + 1 < path.length) { + while (ch === CharacterCodes.slash && index + 1 < length) { index++; ch = path.charCodeAt(index); } if (index > segmentStart) { - if (normalized === undefined) { - // Seen superfluous separator - normalized = path.substring(0, segmentStart - 1); - } + // Seen superfluous separator + normalized ??= path.substring(0, segmentStart - 1); segmentStart = index; } // Past any superfluous separators - const sepIndex = path.indexOf(directorySeparator, index + 1); - const altSepIndex = path.indexOf(altDirectorySeparator, index + 1); - let segmentEnd = sepIndex === -1 ? altSepIndex : altSepIndex === -1 ? sepIndex : Math.min(sepIndex, altSepIndex); + let segmentEnd = path.indexOf(directorySeparator, index + 1); if (segmentEnd === -1) { - segmentEnd = path.length; - } - if (segmentEnd === altSepIndex && normalized === undefined) { - // Seen backslash - normalized = path.substring(0, segmentStart); + segmentEnd = length; } const segmentLength = segmentEnd - segmentStart; if (segmentLength === 1 && path.charCodeAt(index) === CharacterCodes.dot) { // "." segment (skip) - if (normalized === undefined) { - normalized = path.substring(0, normalizedUpTo); - } + normalized ??= path.substring(0, normalizedUpTo); } else if (segmentLength === 2 && path.charCodeAt(index) === CharacterCodes.dot && path.charCodeAt(index + 1) === CharacterCodes.dot) { // ".." segment @@ -699,7 +695,7 @@ export function getNormalizedAbsolutePath(path: string, currentDirectory: string normalized = normalized.substring(0, Math.max(rootLength, lastSlash)); } else { - normalized = normalizedRoot; + normalized = root; } if (normalized.length === rootLength) { seenNonDotDotSegment = rootLength !== 0; @@ -719,7 +715,7 @@ export function getNormalizedAbsolutePath(path: string, currentDirectory: string } index = segmentEnd + 1; } - return normalized ?? (path.length > rootLength ? removeTrailingDirectorySeparator(path) : path); + return normalized ?? (length > rootLength ? removeTrailingDirectorySeparator(path) : path); } /** @internal */ From 4add8b876c6db3d31d060b3ca6be4fc3a5252932 Mon Sep 17 00:00:00 2001 From: Andrew Branch Date: Wed, 8 Jan 2025 17:13:37 -0800 Subject: [PATCH 4/5] =?UTF-8?q?Daniel=E2=80=99s=20optimization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/compiler/path.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/compiler/path.ts b/src/compiler/path.ts index d8679b98744a9..29bc103606309 100644 --- a/src/compiler/path.ts +++ b/src/compiler/path.ts @@ -735,7 +735,10 @@ function simpleNormalizePath(path: string): string | undefined { return path; } // Some paths only require cleanup of `/./` or leading `./` - const simplified = path.replace(/\/\.\//g, "/").replace(/^\.\//, ""); + let simplified = path.replace(/\/\.\//g, "/"); + if (simplified.startsWith("./")) { + simplified = simplified.slice(2); + } if (simplified !== path) { path = simplified; if (!relativePathSegmentRegExp.test(path)) { From e20e5c774fb833e5e463f7c2850ef656388be859 Mon Sep 17 00:00:00 2001 From: Andrew Branch Date: Thu, 9 Jan 2025 10:45:42 -0800 Subject: [PATCH 5/5] Move normalizeSlashes --- src/compiler/path.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/path.ts b/src/compiler/path.ts index 29bc103606309..a06359d51e549 100644 --- a/src/compiler/path.ts +++ b/src/compiler/path.ts @@ -720,6 +720,7 @@ export function getNormalizedAbsolutePath(path: string, currentDirectory: string /** @internal */ export function normalizePath(path: string): string { + path = normalizeSlashes(path); let normalized = simpleNormalizePath(path); if (normalized !== undefined) { return normalized; @@ -729,7 +730,6 @@ export function normalizePath(path: string): string { } function simpleNormalizePath(path: string): string | undefined { - path = normalizeSlashes(path); // Most paths don't require normalization if (!relativePathSegmentRegExp.test(path)) { return path;