From 87cb12d9258f7228a003c9e44d38b735ab608d97 Mon Sep 17 00:00:00 2001 From: Luke Arms Date: Tue, 21 Jan 2025 01:39:07 +1100 Subject: [PATCH] Remove binary prefixes from all strings, normalise heredoc labels - Fix issue where nowdocs with whitespace before labels are not detected (reported in #195) - Fix issue where newlines before heredocs at the start of statements are suppressed when strict PSR-12 compliance is enabled --- docs/Rules.md | 8 +- src/Filter/EvaluateStrings.php | 20 +-- src/Rule/NormaliseStrings.php | 40 +++-- src/Rule/StandardSpacing.php | 6 +- .../in/issues/0195-nowdoc-with-space.php | 7 + .../php-doc/language/types/string/011.php | 2 +- .../parser/scalar/docString/000.php | 2 +- .../parser/scalar/encapsedString/000.php | 4 +- .../issues/0195-nowdoc-with-space.php | 7 + .../php-doc/language/types/string/011.php | 2 +- .../parser/scalar/docString/000.php | 2 +- .../parser/scalar/encapsedString/000.php | 4 +- .../issues/0195-nowdoc-with-space.php | 7 + .../php-doc/language/types/string/011.php | 2 +- .../parser/scalar/docString/000.php | 2 +- .../parser/scalar/encapsedString/000.php | 4 +- .../03-tab/issues/0195-nowdoc-with-space.php | 7 + .../php-doc/language/types/string/011.php | 2 +- .../parser/formattingAttributes/000.php | 22 ++- .../parser/scalar/docString/000.php | 17 ++- .../parser/scalar/docStringNewlines/000.php | 26 +++- .../parser/scalar/encapsedString/000.php | 4 +- .../parser/scalar/flexibleDocString/000.php | 72 ++++++--- .../prettyPrinter/expr/docStrings/000.php | 72 +++++---- .../prettyPrinter/expr/docStrings/001.php | 13 +- .../prettyPrinter/expr/stringEscaping/000.php | 4 +- .../phpfmt/original/100-array-heredoc | 6 +- .../issues/0195-nowdoc-with-space.php | 7 + tests/unit/Rule/NormaliseStringsTest.php | 138 +++++++++++++++++- 29 files changed, 397 insertions(+), 112 deletions(-) create mode 100644 tests/fixtures/Formatter/in/issues/0195-nowdoc-with-space.php create mode 100644 tests/fixtures/Formatter/out/01-default/issues/0195-nowdoc-with-space.php create mode 100644 tests/fixtures/Formatter/out/02-aligned/issues/0195-nowdoc-with-space.php create mode 100644 tests/fixtures/Formatter/out/03-tab/issues/0195-nowdoc-with-space.php create mode 100644 tests/fixtures/Formatter/out/04-psr12/issues/0195-nowdoc-with-space.php diff --git a/docs/Rules.md b/docs/Rules.md index 1360a490..4419ba27 100644 --- a/docs/Rules.md +++ b/docs/Rules.md @@ -96,7 +96,9 @@ Multi-line C-style comments where every line starts with an asterisk, or at leas ### `NormaliseStrings` -(default, `processTokens()`, priority 42, tokens: `T_ENCAPSED_AND_WHITESPACE` | `T_CONSTANT_ENCAPSED_STRING`) +(default, `processTokens()`, priority 42, tokens: `"` | `T_ENCAPSED_AND_WHITESPACE` | `T_CONSTANT_ENCAPSED_STRING` | `T_START_HEREDOC`) + +Double quotes and leading whitespace are removed from heredoc and nowdoc labels. Binary prefixes are removed from all strings. Strings other than nowdocs are normalised as follows: @@ -618,7 +620,7 @@ Newlines and spaces are added after tokens that would otherwise fail to parse. T | `T_DO` | `ControlStructureSpacing` | | `T_DOC_COMMENT` | `Drupal`, `NormaliseComments`, `PlaceComments`, `WordPress` | | `T_DOLLAR_OPEN_CURLY_BRACES` | `PlaceBrackets` | -| `T_DOUBLE_QUOTE` | `ProtectStrings` | +| `T_DOUBLE_QUOTE` | `NormaliseStrings`, `ProtectStrings` | | `T_ELSE` | `ControlStructureSpacing`, `Drupal` | | `T_ELSEIF` | `ControlStructureSpacing`, `Drupal`, `SemiStrictExpressions`, `StrictExpressions` | | `T_ENCAPSED_AND_WHITESPACE` | `NormaliseStrings` | @@ -644,7 +646,7 @@ Newlines and spaces are added after tokens that would otherwise fail to parse. T | `T_QUESTION` | `AlignTernaryOperators`, `VerticalSpacing` | | `T_RETURN` | `BlankBeforeReturn` | | `T_SEMICOLON` | `StatementSpacing` | -| `T_START_HEREDOC` | `FormatHeredocs`, `ProtectStrings`, `StandardSpacing` | +| `T_START_HEREDOC` | `FormatHeredocs`, `NormaliseStrings`, `ProtectStrings`, `StandardSpacing` | | `T_SWITCH` | `SemiStrictExpressions`, `StrictExpressions`, `SwitchIndentation` | | `T_USE` | `VerticalSpacing` | | `T_WHILE` | `ControlStructureSpacing`, `SemiStrictExpressions`, `StrictExpressions` | diff --git a/src/Filter/EvaluateStrings.php b/src/Filter/EvaluateStrings.php index 199a7502..5ccecd9f 100644 --- a/src/Filter/EvaluateStrings.php +++ b/src/Filter/EvaluateStrings.php @@ -6,7 +6,6 @@ use Lkrms\PrettyPHP\Contract\Filter; use Lkrms\PrettyPHP\TokenUtil; use Salient\Utility\Exception\ShouldNotHappenException; -use Salient\Utility\Regex; /** * Evaluate strings for comparison @@ -31,8 +30,16 @@ public function filterTokens(array $tokens): array if (!$lastString) { $stack[] = $token; $lastString = $token; + // `b"` -> `"` + if ($token->id === \T_DOUBLE_QUOTE) { + $token->text = '"'; + } continue; } + // `b<<< "EOF"` -> `<<id === \T_START_HEREDOC) { + $lastString->text = '<<<' . trim(ltrim($lastString->text, 'bB'), "< \t\"\n\r") . "\n"; + } array_pop($stack); $lastString = null; continue; @@ -67,17 +74,12 @@ public function filterTokens(array $tokens): array $text = TokenUtil::unescapeBackticks($token->text); eval("\$string = \"{$text}\";"); } elseif ($lastString->id === \T_START_HEREDOC) { - $start = trim($lastString->text); - // Remove prefix if present, e.g. `b<<text); // Ignore nowdocs - if (substr($start, 0, 4) === "<<<'") { + if ($start[-1] === "'") { continue; } - $end = Regex::replace('/[^a-zA-Z0-9_]+/', '', $start); + $end = trim(ltrim($start, 'bB'), "< \t\"'"); eval("\$string = {$start}\n{$token->text}\n{$end};"); } else { // @codeCoverageIgnoreStart diff --git a/src/Rule/NormaliseStrings.php b/src/Rule/NormaliseStrings.php index 7cf0017a..b8ae8ced 100644 --- a/src/Rule/NormaliseStrings.php +++ b/src/Rule/NormaliseStrings.php @@ -10,7 +10,6 @@ use Lkrms\PrettyPHP\TokenUtil; use Salient\Utility\Exception\ShouldNotHappenException; use Salient\Utility\Regex; -use Salient\Utility\Str; /** * Normalise strings @@ -39,6 +38,8 @@ public static function getPriority(string $method): ?int public static function getTokens(AbstractTokenIndex $idx): array { return [ + \T_DOUBLE_QUOTE => true, + \T_START_HEREDOC => true, \T_CONSTANT_ENCAPSED_STRING => true, \T_ENCAPSED_AND_WHITESPACE => true, ]; @@ -49,12 +50,15 @@ public static function getTokens(AbstractTokenIndex $idx): array */ public static function needsSortedTokens(): bool { - return false; + return true; } /** * Apply the rule to the given tokens * + * Double quotes and leading whitespace are removed from heredoc and nowdoc + * labels. Binary prefixes are removed from all strings. + * * Strings other than nowdocs are normalised as follows: * * - Single- and double-quoted strings are replaced with the most readable @@ -72,15 +76,30 @@ public function processTokens(array $tokens): void { $string = ''; foreach ($tokens as $token) { + // `b"` -> `"` + if ($token->id === \T_DOUBLE_QUOTE) { + if ($token->text !== '"') { + $token->setText('"'); + } + continue; + } + + // `b<<< "EOF"` -> `<<id === \T_START_HEREDOC) { + $text = '<<<' . trim(ltrim($token->text, 'bB'), "< \t\"\n\r") . "\n"; + if ($token->text !== $text) { + $token->setText($text); + } + continue; + } + if ($token->id === \T_ENCAPSED_AND_WHITESPACE) { /** @var Token */ $openedBy = $token->String; - if ($openedBy->id === \T_START_HEREDOC && ( - Str::startsWith($openedBy->text, "<<<'") || ( - $openedBy->text[0] !== '<' - && Str::startsWith(substr($openedBy->text, 1), "<<<'") - ) - )) { + if ( + $openedBy->id === \T_START_HEREDOC + && rtrim($openedBy->text)[-1] === "'" + ) { continue; } } else { @@ -132,10 +151,9 @@ public function processTokens(array $tokens): void break; case \T_START_HEREDOC: - $closedBy = $openedBy->Data[Data::END_STRING]; - $start = trim($openedBy->text); + $start = rtrim($openedBy->text); + $end = trim($start, "<'"); $text = $token->text; - $end = trim($closedBy->text); if ($next->id === \T_END_HEREDOC) { $text = substr($text, 0, -1); $suffix = "\n"; diff --git a/src/Rule/StandardSpacing.php b/src/Rule/StandardSpacing.php index 952aeb69..7f42d25c 100644 --- a/src/Rule/StandardSpacing.php +++ b/src/Rule/StandardSpacing.php @@ -354,7 +354,11 @@ static function () use ($idx, $innerIndent, $next, $last) { continue; } - if ($token->id === \T_START_HEREDOC && $this->Formatter->Psr12) { + if ( + $token->id === \T_START_HEREDOC + && $this->Formatter->Psr12 + && $token !== $token->Statement + ) { $token->Whitespace |= Space::NO_BLANK_BEFORE | Space::NO_LINE_BEFORE | Space::SPACE_BEFORE; } } diff --git a/tests/fixtures/Formatter/in/issues/0195-nowdoc-with-space.php b/tests/fixtures/Formatter/in/issues/0195-nowdoc-with-space.php new file mode 100644 index 00000000..9d3372bc --- /dev/null +++ b/tests/fixtures/Formatter/in/issues/0195-nowdoc-with-space.php @@ -0,0 +1,7 @@ + \ No newline at end of file diff --git a/tests/fixtures/Formatter/out/01-default/3rdparty/php-parser/parser/scalar/docString/000.php b/tests/fixtures/Formatter/out/01-default/3rdparty/php-parser/parser/scalar/docString/000.php index 4744ae5c..bb0ac389 100644 --- a/tests/fixtures/Formatter/out/01-default/3rdparty/php-parser/parser/scalar/docString/000.php +++ b/tests/fixtures/Formatter/out/01-default/3rdparty/php-parser/parser/scalar/docString/000.php @@ -22,7 +22,7 @@ Test $a and $b->c test EOS; -b<< \ No newline at end of file diff --git a/tests/fixtures/Formatter/out/02-aligned/3rdparty/php-parser/parser/scalar/docString/000.php b/tests/fixtures/Formatter/out/02-aligned/3rdparty/php-parser/parser/scalar/docString/000.php index 4744ae5c..bb0ac389 100644 --- a/tests/fixtures/Formatter/out/02-aligned/3rdparty/php-parser/parser/scalar/docString/000.php +++ b/tests/fixtures/Formatter/out/02-aligned/3rdparty/php-parser/parser/scalar/docString/000.php @@ -22,7 +22,7 @@ Test $a and $b->c test EOS; -b<< \ No newline at end of file diff --git a/tests/fixtures/Formatter/out/03-tab/3rdparty/php-parser/parser/scalar/docString/000.php b/tests/fixtures/Formatter/out/03-tab/3rdparty/php-parser/parser/scalar/docString/000.php index 4744ae5c..bb0ac389 100644 --- a/tests/fixtures/Formatter/out/03-tab/3rdparty/php-parser/parser/scalar/docString/000.php +++ b/tests/fixtures/Formatter/out/03-tab/3rdparty/php-parser/parser/scalar/docString/000.php @@ -22,7 +22,7 @@ Test $a and $b->c test EOS; -b<< \ No newline at end of file diff --git a/tests/fixtures/Formatter/out/04-psr12/3rdparty/php-parser/parser/formattingAttributes/000.php b/tests/fixtures/Formatter/out/04-psr12/3rdparty/php-parser/parser/formattingAttributes/000.php index fc59505a..7beee068 100644 --- a/tests/fixtures/Formatter/out/04-psr12/3rdparty/php-parser/parser/formattingAttributes/000.php +++ b/tests/fixtures/Formatter/out/04-psr12/3rdparty/php-parser/parser/formattingAttributes/000.php @@ -14,18 +14,26 @@ "foo\nbar"; "foo\nbar{$x}"; `foo\nbar`; -`foo\nbar{$x}`; <<<'ABC' - ABC; <<<'ABC' +`foo\nbar{$x}`; + +<<<'ABC' + ABC; +<<<'ABC' foo bar - ABC; <<<'ABC' + ABC; +<<<'ABC' foo bar - ABC; <<c test - EOS; b<<d} STR; -call(<< [ + 'maybeEscapeEscapes escapes' => [ <<<'PHP' [ + <<<'PHP' + [ + <<<'PHP' +