Skip to content

Commit

Permalink
implement RST & Markdown quote blocks (nim-lang#19147)
Browse files Browse the repository at this point in the history
* implement RST & Markdown quote blocks

* compile with nim 1.0

* Fix indentation
  • Loading branch information
a-mr authored and PMunch committed Mar 28, 2022
1 parent 66283e1 commit 2a621d4
Show file tree
Hide file tree
Showing 7 changed files with 617 additions and 12 deletions.
10 changes: 9 additions & 1 deletion config/nimdoc.tex.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,17 @@ doc.file = """
\usepackage[most]{tcolorbox} % boxes around admonitions, code blocks, doc.item

\newtcolorbox{rstadmonition}[1][]{blanker, breakable,
left=3mm, right=3mm, top=1mm, bottom=1mm,
left=3mm, right=0mm, top=1mm, bottom=1mm,
before upper=\indent, parbox=false, #1}

\newtcolorbox{rstquote}[1][]{blanker, breakable,
left=3mm, right=3mm, top=1mm, bottom=1mm,
parbox=false,
borderline west={0.3em}{0pt}{lightgray},
borderline north={0.05em}{0pt}{lightgray},
borderline east={0.05em}{0pt}{lightgray},
borderline south={0.05em}{0pt}{lightgray}}

\definecolor{rstframecolor}{rgb}{0.85, 0.8, 0.6}

\newtcolorbox{rstprebox}[1][]{blanker, breakable,
Expand Down
5 changes: 5 additions & 0 deletions doc/nimdoc.css
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,11 @@ blockquote {
border-left: 5px solid #bbc;
}

blockquote.markdown-quote {
font-size: 0.9rem; /* use rem to avoid recursion */
font-style: normal;
}

.pre, span.tok {
font-family: "Source Code Pro", Monaco, Menlo, Consolas, "Courier New", monospace;
font-weight: 500;
Expand Down
210 changes: 202 additions & 8 deletions lib/packages/docutils/rst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
## + field lists
## + option lists
## + indented literal blocks
## + quoted literal blocks
## + line blocks
## + simple tables
## + directives (see official documentation in `RST directives list`_):
## - ``image``, ``figure`` for including images and videos
Expand Down Expand Up @@ -121,6 +123,7 @@
## * Markdown code blocks
## * Markdown links
## * Markdown headlines
## * Markdown block quotes
## * using ``1`` as auto-enumerator in enumerated lists like RST ``#``
## (auto-enumerator ``1`` can not be used with ``#`` in the same list)
##
Expand All @@ -145,7 +148,7 @@
## 2) Compatibility mode which is RST rules.
##
## .. Note:: in both modes the parser interpretes text between single
## backticks (code) identically:
## backticks (code) identically:
## backslash does not escape; the only exception: ``\`` folowed by `
## does escape so that we can always input a single backtick ` in
## inline code. However that makes impossible to input code with
Expand All @@ -156,13 +159,35 @@
## ``\`` -- GOOD
## So single backticks can always be input: `\`` will turn to ` code
##
## .. Attention::
## We don't support some obviously poor design choices of Markdown (or RST).
##
## - no support for the rule of 2 spaces causing a line break in Markdown
## (use RST "line blocks" syntax for making line breaks)
##
## - interpretation of Markdown block quotes is also slightly different,
## e.g. case
##
## ::
##
## >>> foo
## > bar
## >>baz
##
## is a single 3rd-level quote `foo bar baz` in original Markdown, while
## in Nim we naturally see it as 3rd-level quote `foo` + 1st level `bar` +
## 2nd level `baz`:
##
## >>> foo
## > bar
## >>baz
##
## Limitations
## -----------
##
## * no Unicode support in character width calculations
## * body elements
## - no roman numerals in enumerated lists
## - no quoted literal blocks
## - no doctest blocks
## - no grid tables
## - some directives are missing (check official `RST directives list`_):
Expand Down Expand Up @@ -472,6 +497,10 @@ type
line: int # the last line of this style occurrence
# (for error message)
hasPeers: bool # has headings on the same level of hierarchy?
LiteralBlockKind = enum # RST-style literal blocks after `::`
lbNone,
lbIndentedLiteralBlock,
lbQuotedLiteralBlock
LevelMap = seq[LevelInfo] # Saves for each possible title adornment
# style its level in the current document.
SubstitutionKind = enum
Expand Down Expand Up @@ -1953,6 +1982,44 @@ proc parseLiteralBlock(p: var RstParser): PRstNode =
inc p.idx
result.add(n)

proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
result = newRstNodeA(p, rnLiteralBlock)
var n = newLeaf("")
if currentTok(p).kind == tkIndent:
var indent = currInd(p)
while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
var quoteSym = currentTok(p).symbol[0]
while true:
case currentTok(p).kind
of tkEof:
break
of tkIndent:
if currentTok(p).ival < indent:
break
elif currentTok(p).ival == indent:
if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
n.text.add("\n")
inc p.idx
elif nextTok(p).kind == tkIndent:
break
else:
rstMessage(p, mwRstStyle, "no newline after quoted literal block")
break
else:
rstMessage(p, mwRstStyle,
"unexpected indentation in quoted literal block")
break
else:
n.text.add(currentTok(p).symbol)
inc p.idx
result.add(n)

proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
if kind == lbIndentedLiteralBlock:
result = parseLiteralBlock(p)
else:
result = parseQuotedLiteralBlock(p)

proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
## Returns (preliminary) heading level corresponding to `c` and
## `hasOverline`. If level does not exist, add it first.
Expand Down Expand Up @@ -2023,6 +2090,33 @@ proc isLineBlock(p: RstParser): bool =
p.tok[j].col > currentTok(p).col or
p.tok[j].symbol == "\n"

proc isMarkdownBlockQuote(p: RstParser): bool =
result = currentTok(p).symbol[0] == '>'

proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
## Checks that the following tokens are either Indented Literal Block or
## Quoted Literal Block (which is not quite the same as Markdown quote block).
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
if currInd(p) > nextTok(p).ival:
result = lbNone
if currInd(p) < nextTok(p).ival:
result = lbIndentedLiteralBlock
elif currInd(p) == nextTok(p).ival:
var i = p.idx + 1
while p.tok[i].kind == tkIndent: inc i
const validQuotingCharacters = {
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
'.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
'_', '`', '{', '|', '}', '~'}
if p.tok[i].kind in {tkPunct, tkAdornment} and
p.tok[i].symbol[0] in validQuotingCharacters:
result = lbQuotedLiteralBlock
else:
result = lbNone
else:
result = lbNone

proc predNL(p: RstParser): bool =
result = true
if p.idx > 0:
Expand Down Expand Up @@ -2078,6 +2172,8 @@ proc whichSection(p: RstParser): RstNodeKind =
elif match(p, p.idx + 1, " a"): result = rnTable
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
result = rnMarkdownBlockQuote
elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
result = rnOverline
else:
Expand All @@ -2090,6 +2186,8 @@ proc whichSection(p: RstParser): RstNodeKind =
result = rnMarkdownTable
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
result = rnMarkdownBlockQuote
elif match(p, tokenAfterNewline(p), "aI") and
isAdornmentHeadline(p, tokenAfterNewline(p)):
result = rnHeadline
Expand Down Expand Up @@ -2143,6 +2241,102 @@ proc parseLineBlock(p: var RstParser): PRstNode =
else:
break

proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}

proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
result = ("", 0, 0)
var i = idx
result.sym &= p.tok[i].symbol
result.depth += p.tok[i].symbol.len
inc result.tokens
inc i
while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
result.sym &= p.tok[i].symbol
result.sym &= p.tok[i+1].symbol
result.depth += p.tok[i+1].symbol.len
inc result.tokens, 2
inc i, 2

proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
PRstNode =
## We define *segment* as a group of lines that starts with exactly the
## same quote symbol. If the following lines don't contain any `>` (*lazy*
## continuation) they considered as continuation of the current segment.
var q: RstParser # to delete `>` at a start of line and then parse normally
initParser(q, p.s)
q.col = p.col
q.line = p.line
var minCol = int.high # minimum colum num in the segment
while true: # move tokens of segment from `p` to `q` skipping `curSym`
case currentTok(p).kind
of tkEof:
break
of tkIndent:
if nextTok(p).kind in {tkIndent, tkEof}:
break
else:
if nextTok(p).symbol[0] == '>':
var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
if quoteSym == curSym: # the segment continues
var iTok = tokenAfterNewline(p, p.idx+1)
if p.tok[iTok].kind notin {tkEof, tkIndent} and
p.tok[iTok].symbol[0] != '>':
rstMessage(p, mwRstStyle,
"two or more quoted lines are followed by unquoted line " &
$(curLine(p) + 1))
break
q.tok.add currentTok(p)
var ival = currentTok(p).ival + quoteSym.len
inc p.idx, (1 + quoteTokens) # skip newline and > > >
if currentTok(p).kind == tkWhite:
ival += currentTok(p).symbol.len
inc p.idx
# fix up previous `tkIndent`s to ival (as if >>> were not there)
var j = q.tok.len - 1
while j >= 0 and q.tok[j].kind == tkIndent:
q.tok[j].ival = ival
dec j
else: # next segment started
break
elif currentTok(p).ival < col:
break
else: # the segment continues, a case like:
# > beginning
# continuation
q.tok.add currentTok(p)
inc p.idx
else:
if currentTok(p).col < minCol: minCol = currentTok(p).col
q.tok.add currentTok(p)
inc p.idx
q.indentStack = @[minCol]
# if initial indentation `minCol` is > 0 then final newlines
# should be omitted so that parseDoc could advance to the end of tokens:
var j = q.tok.len - 1
while q.tok[j].kind == tkIndent: dec j
q.tok.setLen (j+1)
q.tok.add Token(kind: tkEof, line: currentTok(p).line)
result = parseDoc(q)

proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
let col = currentTok(p).col
result = newRstNodeA(p, rnMarkdownBlockQuote)
inc p.idx, quoteTokens # skip first >
while true:
var item = newRstNode(rnMarkdownBlockQuoteItem)
item.quotationDepth = quotationDepth
if currentTok(p).kind == tkWhite: inc p.idx
item.add parseMarkdownQuoteSegment(p, curSym, col)
result.add(item)
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
(curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
inc p.idx, (1 + quoteTokens) # skip newline and > > >
else:
break

proc parseParagraph(p: var RstParser, result: PRstNode) =
while true:
case currentTok(p).kind
Expand All @@ -2158,16 +2352,17 @@ proc parseParagraph(p: var RstParser, result: PRstNode) =
result.add newLeaf(" ")
of rnLineBlock:
result.addIfNotNil(parseLineBlock(p))
of rnMarkdownBlockQuote:
result.addIfNotNil(parseMarkdownBlockQuote(p))
else: break
else:
break
of tkPunct:
if currentTok(p).symbol == "::" and
nextTok(p).kind == tkIndent and
currInd(p) < nextTok(p).ival:
if (let literalBlockKind = whichRstLiteralBlock(p);
literalBlockKind != lbNone):
result.add newLeaf(":")
inc p.idx # skip '::'
result.add(parseLiteralBlock(p))
result.add(parseRstLiteralBlock(p, literalBlockKind))
break
else:
parseInline(p, result)
Expand Down Expand Up @@ -2257,8 +2452,6 @@ proc getColumns(p: var RstParser, cols: var IntSeq) =
# last column has no limit:
cols[L - 1] = 32000

proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}

proc parseSimpleTable(p: var RstParser): PRstNode =
var
cols: IntSeq
Expand Down Expand Up @@ -2585,6 +2778,7 @@ proc parseSection(p: var RstParser, result: PRstNode) =
a = parseLiteralBlock(p)
of rnBulletList: a = parseBulletList(p)
of rnLineBlock: a = parseLineBlock(p)
of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
of rnDirective: a = parseDotDot(p)
of rnEnumList: a = parseEnumList(p)
of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
Expand Down
9 changes: 8 additions & 1 deletion lib/packages/docutils/rstast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ type
rnFieldName, # consisting of a field name ...
rnFieldBody, # ... and a field body
rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString,
rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock,
rnOptionArgument, rnDescription, rnLiteralBlock,
rnMarkdownBlockQuote, # a quote starting from punctuation like >>>
rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with
# the same number of chars
rnLineBlock, # the | thingie
rnLineBlockItem, # a son of rnLineBlock - one line inside it.
# When `RstNode` lineIndent="\n" the line's empty
Expand Down Expand Up @@ -101,6 +104,8 @@ type
of rnFootnote, rnCitation, rnOptionListItem:
order*: int ## footnote order (for auto-symbol footnotes and
## auto-numbered ones without a label)
of rnMarkdownBlockQuoteItem:
quotationDepth*: int ## number of characters in line prefix
of rnRef, rnSubstitutionReferences,
rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef:
info*: TLineInfo ## To have line/column info for warnings at
Expand Down Expand Up @@ -409,6 +414,8 @@ proc treeRepr*(node: PRstNode, indent=0): string =
result.add " level=" & $node.level
of rnFootnote, rnCitation, rnOptionListItem:
result.add (if node.order == 0: "" else: " order=" & $node.order)
of rnMarkdownBlockQuoteItem:
result.add " quotationDepth=" & $node.quotationDepth
else:
discard
result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'")
Expand Down
Loading

0 comments on commit 2a621d4

Please sign in to comment.