Implementing tree-sitter based indentation logic

- previously developed and tested in the sane-indentation package (> 0.9). Refer to atom/language-javascript#594 (comment) By itself this does nothing. The new logic is only used if the language package for the current language contains the necessary configuration (e.g., which scopes to indent on). So this PR goes together with, e.g., FILL-ME-IN in language-javascript. Updated: now without the need for 'precedingRowCondition' callbacks in the languages-specific configuration
chfritz · May 20, 2021 · a8f7fdb · a8f7fdb
1 parent 79d0a6d
commit a8f7fdb
Show file tree

Hide file tree

Showing 3 changed files with 157 additions and 17 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/src/tree-indenter.js b/src/tree-indenter.js
@@ -0,0 +1,120 @@
+
+// const log = console.debug // in dev
+const log = () => {} // in production
+
+module.exports = class TreeIndenter {
+  constructor (languageMode) {
+    this.languageMode = languageMode
+    this.scopes = languageMode.config.get('editor.scopes',
+      {scope: this.languageMode.rootScopeDescriptor})
+    log('[TreeIndenter] constructor', this.scopes)
+  }
+
+  /** tree indenter is configured for this language */
+  get isConfigured () {
+    return (!!this.scopes)
+  }
+
+  // Given a position, walk up the syntax tree, to find the highest level
+  // node that still starts here. This is to identify the column where this
+  // node (e.g., an HTML closing tag) ends.
+  _getHighestSyntaxNodeAtPosition (row, column = null) {
+    if (column == null) {
+      // Find the first character on the row that is not whitespace + 1
+      column = this.languageMode.buffer.lineForRow(row).search(/\S/) + 1
+    }
+
+    let syntaxNode
+    if (column >= 0) {
+      syntaxNode = this.languageMode.getSyntaxNodeAtPosition({row, column})
+      while (syntaxNode && syntaxNode.parent &&
+          syntaxNode.parent.startPosition.row === syntaxNode.startPosition.row &&
+          syntaxNode.parent.endPosition.row === syntaxNode.startPosition.row &&
+          syntaxNode.parent.startPosition.column === syntaxNode.startPosition.column
+      ) {
+        syntaxNode = syntaxNode.parent
+      }
+      return syntaxNode
+    }
+  }
+
+  /** Walk up the tree. Everytime we meet a scope type, check whether we
+    are coming from the first (resp. last) child. If so, we are opening
+    (resp. closing) that scope, i.e., do not count it. Otherwise, add 1.
+
+    This is the core function.
+
+    It might make more sense to reverse the direction of this walk, i.e.,
+    go from root to leaf instead.
+  */
+  _treeWalk (node, lastScope = null) {
+    if (node == null || node.parent == null) {
+      return 0
+    } else {
+      let increment = 0
+
+      const notFirstOrLastSibling =
+        (node.previousSibling != null && node.nextSibling != null)
+
+      const isScope = this.scopes.indent[node.parent.type]
+      notFirstOrLastSibling && isScope && increment++
+
+      const isScope2 = this.scopes.indentExceptFirst[node.parent.type]
+      !increment && isScope2 && node.previousSibling != null && increment++
+
+      const isScope3 = this.scopes.indentExceptFirstOrBlock[node.parent.type]
+      !increment && isScope3 && node.previousSibling != null && increment++
+
+      // apply current row, single line, type-based rules, e.g., 'else' or 'private:'
+      let typeDent = 0
+      this.scopes.types.indent[node.type] && typeDent++
+      this.scopes.types.outdent[node.type] && increment && typeDent--
+      increment += typeDent
+
+      // check whether the last (lower) indentation happend due to a scope that
+      // started on the same row and ends directly before this.
+      if (lastScope && increment > 0 &&
+        // previous (lower) scope was a two-sided scope, reduce if starts on
+        // same row and ends right before
+        // TODO: this currently only works for scopes that have a single-character
+        // closing delimiter (like statement_blocks, but not HTML, for instance).
+        ((node.parent.startPosition.row === lastScope.node.startPosition.row &&
+          (node.parent.endIndex <= lastScope.node.endIndex + 1)) ||
+          // or this is a special scope (like if, while) and it's ends coincide
+          (isScope3 && lastScope.node.endIndex === node.endIndex))) {
+        log('ignoring repeat', node.parent.type, lastScope)
+        increment = 0
+      }
+
+      log('treewalk', {node, notFirstOrLastSibling, type: node.parent.type, increment})
+      const newLastScope = (isScope || isScope2 ? {node: node.parent} : lastScope)
+      return this._treeWalk(node.parent, newLastScope) + increment
+    }
+  }
+
+  suggestedIndentForBufferRow (row, tabLength, options) {
+    // get current indentation for row
+    const line = this.languageMode.buffer.lineForRow(row)
+    const currentIndentation = this.languageMode.indentLevelForLine(line, tabLength)
+
+    const syntaxNode = this._getHighestSyntaxNodeAtPosition(row)
+    if (!syntaxNode) {
+      return 0
+    }
+    let indentation = this._treeWalk(syntaxNode)
+
+    // Special case for comments
+    if (syntaxNode.type === 'comment' &&
+      syntaxNode.startPosition.row < row &&
+      syntaxNode.endPosition.row > row) {
+      indentation += 1
+    }
+
+    if (options && options.preserveLeadingWhitespace) {
+      indentation -= currentIndentation
+    }
+
+    return indentation
+  }
+
+}
diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js
@@ -1,17 +1,18 @@
-const Parser = require('tree-sitter');
-const { Point, Range, spliceArray } = require('text-buffer');
-const { Patch } = require('superstring');
-const { Emitter } = require('event-kit');
-const ScopeDescriptor = require('./scope-descriptor');
-const Token = require('./token');
-const TokenizedLine = require('./tokenized-line');
-const TextMateLanguageMode = require('./text-mate-language-mode');
-const { matcherForSelector } = require('./selectors');
-
-let nextId = 0;
-const MAX_RANGE = new Range(Point.ZERO, Point.INFINITY).freeze();
-const PARSER_POOL = [];
-const WORD_REGEX = /\w/;
+const Parser = require('tree-sitter')
+const {Point, Range, spliceArray} = require('text-buffer')
+const {Patch} = require('superstring')
+const {Emitter} = require('event-kit')
+const ScopeDescriptor = require('./scope-descriptor')
+const Token = require('./token')
+const TokenizedLine = require('./tokenized-line')
+const TextMateLanguageMode = require('./text-mate-language-mode')
+const {matcherForSelector} = require('./selectors')
+const TreeIndenter = require('./tree-indenter')
+
+let nextId = 0
+const MAX_RANGE = new Range(Point.ZERO, Point.INFINITY).freeze()
+const PARSER_POOL = []
+const WORD_REGEX = /\w/
 
 class TreeSitterLanguageMode {
   static _patchSyntaxNode() {
@@ -193,6 +194,7 @@ class TreeSitterLanguageMode {
     );
   }
 
+<<<<<<< HEAD
   suggestedIndentForBufferRow(row, tabLength, options) {
     return this._suggestedIndentForLineWithScopeAtBufferRow(
       row,
@@ -201,6 +203,25 @@ class TreeSitterLanguageMode {
       tabLength,
       options
     );
+=======
+  suggestedIndentForBufferRow (row, tabLength, options) {
+    if (!this.treeIndenter) {
+      this.treeIndenter = new TreeIndenter(this)
+    }
+
+    if (this.treeIndenter.isConfigured) {
+      const indent = this.treeIndenter.suggestedIndentForBufferRow(row, tabLength, options)
+      return indent
+    } else {
+      return this._suggestedIndentForLineWithScopeAtBufferRow(
+        row,
+        this.buffer.lineForRow(row),
+        this.rootScopeDescriptor,
+        tabLength,
+        options
+      )
+    }
+>>>>>>> c8d52da3e (Implementing tree-sitter based indentation logic)
   }
 
   indentLevelForLine(line, tabLength) {