zulip · PIG208 · Jan 15, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/lib/model/content.dart b/lib/model/content.dart
@@ -786,7 +786,7 @@ class MathInlineNode extends InlineContentNode {
 class GlobalTimeNode extends InlineContentNode {
   const GlobalTimeNode({super.debugHtmlNode, required this.datetime});
 
-  /// Always in UTC, enforced in [_ZulipContentParser.parseInlineContent].
+  /// Always in UTC, enforced in [_ZulipInlineContentParser.parseInlineContent].
   final DateTime datetime;
 
   @override
@@ -806,72 +806,68 @@ class GlobalTimeNode extends InlineContentNode {
 
 ////////////////////////////////////////////////////////////////
 
-/// What sort of nodes a [_ZulipContentParser] is currently expecting to find.
-enum _ParserContext {
-  /// The parser is currently looking for block nodes.
-  block,
+String? _parseMath(dom.Element element, {required bool block}) {
+  final dom.Element katexElement;
+  if (!block) {
+    assert(element.localName == 'span' && element.className == 'katex');
 
-  /// The parser is currently looking for inline nodes.
-  inline,
-}
-
-class _ZulipContentParser {
-  /// The current state of what sort of nodes the parser is looking for.
-  ///
-  /// This exists for the sake of debug-mode checks,
-  /// and should be read or updated only inside an assertion.
-  _ParserContext _debugParserContext = _ParserContext.block;
-
-  String? parseMath(dom.Element element, {required bool block}) {
-    assert(block == (_debugParserContext == _ParserContext.block));
-
-    final dom.Element katexElement;
-    if (!block) {
-      assert(element.localName == 'span' && element.className == 'katex');
+    katexElement = element;
+  } else {
+    assert(element.localName == 'span' && element.className == 'katex-display');
 
-      katexElement = element;
-    } else {
-      assert(element.localName == 'span' && element.className == 'katex-display');
-
-      if (element.nodes.length != 1) return null;
-      final child = element.nodes.single;
-      if (child is! dom.Element) return null;
-      if (child.localName != 'span') return null;
-      if (child.className != 'katex') return null;
-      katexElement = child;
-    }
-
-    // Expect two children span.katex-mathml, span.katex-html .
-    // For now we only care about the .katex-mathml .
-    if (katexElement.nodes.isEmpty) return null;
-    final child = katexElement.nodes.first;
+    if (element.nodes.length != 1) return null;
+    final child = element.nodes.single;
     if (child is! dom.Element) return null;
     if (child.localName != 'span') return null;
-    if (child.className != 'katex-mathml') return null;
-
-    if (child.nodes.length != 1) return null;
-    final grandchild = child.nodes.single;
-    if (grandchild is! dom.Element) return null;
-    if (grandchild.localName != 'math') return null;
-    if (grandchild.attributes['display'] != (block ? 'block' : null)) return null;
-    if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML') return null;
-
-    if (grandchild.nodes.length != 1) return null;
-    final greatgrand = grandchild.nodes.single;
-    if (greatgrand is! dom.Element) return null;
-    if (greatgrand.localName != 'semantics') return null;
-
-    if (greatgrand.nodes.isEmpty) return null;
-    final descendant4 = greatgrand.nodes.last;
-    if (descendant4 is! dom.Element) return null;
-    if (descendant4.localName != 'annotation') return null;
-    if (descendant4.attributes['encoding'] != 'application/x-tex') return null;
-
-    return descendant4.text.trim();
+    if (child.className != 'katex') return null;
+    katexElement = child;
+  }
+
+  // Expect two children span.katex-mathml, span.katex-html .
+  // For now we only care about the .katex-mathml .
+  if (katexElement.nodes.isEmpty) return null;
+  final child = katexElement.nodes.first;
+  if (child is! dom.Element) return null;
+  if (child.localName != 'span') return null;
+  if (child.className != 'katex-mathml') return null;
+
+  if (child.nodes.length != 1) return null;
+  final grandchild = child.nodes.single;
+  if (grandchild is! dom.Element) return null;
+  if (grandchild.localName != 'math') return null;
+  if (grandchild.attributes['display'] != (block ? 'block' : null)) return null;
+  if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML') return null;
+
+  if (grandchild.nodes.length != 1) return null;
+  final greatgrand = grandchild.nodes.single;
+  if (greatgrand is! dom.Element) return null;
+  if (greatgrand.localName != 'semantics') return null;
+
+  if (greatgrand.nodes.isEmpty) return null;
+  final descendant4 = greatgrand.nodes.last;
+  if (descendant4 is! dom.Element) return null;
+  if (descendant4.localName != 'annotation') return null;
+  if (descendant4.attributes['encoding'] != 'application/x-tex') return null;
+
+  return descendant4.text.trim();
+}
+
+/// Parser for the inline-content subtrees within Zulip content HTML.
+///
+/// The only entry point to this class is [parseBlockInline].
+///
+/// After a call to [parseBlockInline] returns, the [_ZulipInlineContentParser]
+/// instance has been reset to its starting state, and can be re-used for
+/// parsing other subtrees.
+class _ZulipInlineContentParser {
+  InlineContentNode? parseInlineMath(dom.Element element) {
+    final debugHtmlNode = kDebugMode ? element : null;
+    final texSource = _parseMath(element, block: false);
+    if (texSource == null) return null;
+    return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
   }
 
   UserMentionNode? parseUserMention(dom.Element element) {
-    assert(_debugParserContext == _ParserContext.inline);
     assert(element.localName == 'span');
     final debugHtmlNode = kDebugMode ? element : null;
 
@@ -945,7 +941,6 @@ class _ZulipContentParser {
   static final _emojiCodeFromClassNameRegexp = RegExp(r"emoji-([^ ]+)");
 
   InlineContentNode parseInlineContent(dom.Node node) {
-    assert(_debugParserContext == _ParserContext.inline);
     final debugHtmlNode = kDebugMode ? node : null;
     InlineContentNode unimplemented() => UnimplementedInlineContentNode(htmlNode: node);
 
@@ -1025,36 +1020,49 @@ class _ZulipContentParser {
     }
 
     if (localName == 'span' && className == 'katex') {
-      final texSource = parseMath(element, block: false);
-      if (texSource == null) return unimplemented();
-      return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
+      return parseInlineMath(element) ?? unimplemented();
     }
 
     // TODO more types of node
     return unimplemented();
   }
 
   List<InlineContentNode> parseInlineContentList(List<dom.Node> nodes) {
-    assert(_debugParserContext == _ParserContext.inline);
     return nodes.map(parseInlineContent).toList(growable: false);
   }
 
+  /// Parse the children of a [BlockInlineContainerNode], making up a
+  /// complete subtree of inline content with no further inline ancestors.
   ({List<InlineContentNode> nodes, List<LinkNode>? links}) parseBlockInline(List<dom.Node> nodes) {
-    assert(_debugParserContext == _ParserContext.block);
-    assert(() {
-      _debugParserContext = _ParserContext.inline;
-      return true;
-    }());
     final resultNodes = parseInlineContentList(nodes);
-    assert(() {
-      _debugParserContext = _ParserContext.block;
-      return true;
-    }());
     return (nodes: resultNodes, links: _takeLinkNodes());
   }
+}
+
+/// Parser for a complete piece of Zulip HTML content, a [ZulipContent].
+///
+/// The only entry point to this class is [parse].
+class _ZulipContentParser {
+  /// The single inline-content parser used and re-used throughout parsing of
+  /// a complete piece of Zulip HTML content.
+  ///
+  /// Because block content can never appear nested inside inline content,
+  /// there's never a need for more than one of these at a time,
+  /// so we can allocate just one up front.
+  final inlineParser = _ZulipInlineContentParser();
+
+  ({List<InlineContentNode> nodes, List<LinkNode>? links}) parseBlockInline(List<dom.Node> nodes) {
+    return inlineParser.parseBlockInline(nodes);
+  }
+
+  BlockContentNode parseMathBlock(dom.Element element) {
+    final debugHtmlNode = kDebugMode ? element : null;
+    final texSource = _parseMath(element, block: true);
+    if (texSource == null) return UnimplementedBlockContentNode(htmlNode: element);
+    return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
+  }
 
   BlockContentNode parseListNode(dom.Element element) {
-    assert(_debugParserContext == _ParserContext.block);
     ListStyle? listStyle;
     switch (element.localName) {
       case 'ol': listStyle = ListStyle.ordered; break;
@@ -1077,7 +1085,6 @@ class _ZulipContentParser {
   }
 
   BlockContentNode parseSpoilerNode(dom.Element divElement) {
-    assert(_debugParserContext == _ParserContext.block);
     assert(divElement.localName == 'div'
         && divElement.className == 'spoiler-block');
 
@@ -1097,7 +1104,6 @@ class _ZulipContentParser {
   }
 
   BlockContentNode parseCodeBlock(dom.Element divElement) {
-    assert(_debugParserContext == _ParserContext.block);
     final mainElement = () {
       assert(divElement.localName == 'div'
           && divElement.className == "codehilite");
@@ -1180,7 +1186,6 @@ class _ZulipContentParser {
   static final _imageDimensionsRegExp = RegExp(r'^(\d+)x(\d+)$');
 
   BlockContentNode parseImageNode(dom.Element divElement) {
-    assert(_debugParserContext == _ParserContext.block);
     final elements = () {
       assert(divElement.localName == 'div'
           && divElement.className == 'message_inline_image');
@@ -1272,7 +1277,6 @@ class _ZulipContentParser {
   }();
 
   BlockContentNode parseInlineVideoNode(dom.Element divElement) {
-    assert(_debugParserContext == _ParserContext.block);
     assert(divElement.localName == 'div'
       && _videoClassNameRegexp.hasMatch(divElement.className));
 
@@ -1305,7 +1309,6 @@ class _ZulipContentParser {
   }
 
   BlockContentNode parseEmbedVideoNode(dom.Element divElement) {
-    assert(_debugParserContext == _ParserContext.block);
     assert(divElement.localName == 'div'
       && _videoClassNameRegexp.hasMatch(divElement.className));
 
@@ -1344,7 +1347,6 @@ class _ZulipContentParser {
   }
 
   BlockContentNode parseTableContent(dom.Element tableElement) {
-    assert(_debugParserContext == _ParserContext.block);
     assert(tableElement.localName == 'table'
         && tableElement.className.isEmpty);
 
@@ -1452,7 +1454,6 @@ class _ZulipContentParser {
   }
 
   BlockContentNode parseBlockContent(dom.Node node) {
-    assert(_debugParserContext == _ParserContext.block);
     final debugHtmlNode = kDebugMode ? node : null;
     if (node is! dom.Element) {
       return UnimplementedBlockContentNode(htmlNode: node);
@@ -1480,9 +1481,7 @@ class _ZulipContentParser {
             // The case with the `<br>\n` can happen when at the end of a quote;
             // it seems like a glitch in the server's Markdown processing,
             // so hopefully there just aren't any further such glitches.
-            final texSource = parseMath(child, block: true);
-            if (texSource == null) return UnimplementedBlockContentNode(htmlNode: node);
-            return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
+            return parseMathBlock(child);
           }
         }
       }
@@ -1579,10 +1578,15 @@ class _ZulipContentParser {
   ///
   /// See [ParagraphNode].
   List<BlockContentNode> parseImplicitParagraphBlockContentList(dom.NodeList nodes) {
-    assert(_debugParserContext == _ParserContext.block);
     final List<BlockContentNode> result = [];
-    final List<dom.Node> currentParagraph = [];
+
     List<ImageNode> imageNodes = [];
+    void consumeImageNodes() {
+      result.add(ImageNodeList(imageNodes));
+      imageNodes = [];
+    }
+
+    final List<dom.Node> currentParagraph = [];
     void consumeParagraph() {
       final parsed = parseBlockInline(currentParagraph);
       result.add(ParagraphNode(
@@ -1597,8 +1601,7 @@ class _ZulipContentParser {
 
       if (_isPossibleInlineNode(node)) {
         if (imageNodes.isNotEmpty) {
-          result.add(ImageNodeList(imageNodes));
-          imageNodes = [];
+          consumeImageNodes();
           // In a context where paragraphs are implicit it should be impossible
           // to have more paragraph content after image previews.
           result.add(UnimplementedBlockContentNode(htmlNode: node));
@@ -1613,24 +1616,25 @@ class _ZulipContentParser {
         imageNodes.add(block);
         continue;
       }
-      if (imageNodes.isNotEmpty) {
-        result.add(ImageNodeList(imageNodes));
-        imageNodes = [];
-      }
+      if (imageNodes.isNotEmpty) consumeImageNodes();
       result.add(block);
     }
     if (currentParagraph.isNotEmpty) consumeParagraph();
-    if (imageNodes.isNotEmpty) result.add(ImageNodeList(imageNodes));
-
+    if (imageNodes.isNotEmpty) consumeImageNodes();
     return result;
   }
 
   static final _redundantLineBreaksRegexp = RegExp(r'^\n+$');
 
   List<BlockContentNode> parseBlockContentList(dom.NodeList nodes) {
-    assert(_debugParserContext == _ParserContext.block);
     final List<BlockContentNode> result = [];
+
     List<ImageNode> imageNodes = [];
+    void consumeImageNodes() {
+      result.add(ImageNodeList(imageNodes));
+      imageNodes = [];
+    }
+
     for (final node in nodes) {
       // We get a bunch of newline Text nodes between paragraphs.
       // A browser seems to ignore these; let's do the same.
@@ -1643,13 +1647,10 @@ class _ZulipContentParser {
         imageNodes.add(block);
         continue;
       }
-      if (imageNodes.isNotEmpty) {
-        result.add(ImageNodeList(imageNodes));
-        imageNodes = [];
-      }
+      if (imageNodes.isNotEmpty) consumeImageNodes();
       result.add(block);
     }
-    if (imageNodes.isNotEmpty) result.add(ImageNodeList(imageNodes));
+    if (imageNodes.isNotEmpty) consumeImageNodes();
     return result;
   }
 
@@ -1660,6 +1661,8 @@ class _ZulipContentParser {
   }
 }
 
+/// Parse a complete piece of Zulip HTML content,
+/// such as an entire value of [Message.content].
 ZulipContent parseContent(String html) {
   return _ZulipContentParser().parse(html);
 }