Skip to content

Commit

Permalink
feat(transformers): introduce matchAlgorithm option for new matchin…
Browse files Browse the repository at this point in the history
…g algorithm (#835)

Co-authored-by: Anthony Fu <[email protected]>
  • Loading branch information
fuma-nama and antfu authored Jan 20, 2025
1 parent 4ed7fa3 commit ceca984
Show file tree
Hide file tree
Showing 21 changed files with 448 additions and 105 deletions.
38 changes: 38 additions & 0 deletions docs/packages/transformers.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,44 @@ const html = await codeToHtml(code, {

Transformers only applies classes and does not come with styles; you can provide your own CSS rules to style them properly.

## Matching Algorithm

We found that the algorithm for matching comments in v1 is sometime conterintuitive, where we are trying to fix it in a progressive way. Since v1.29.0, we introduced a new `matchAlgorithm` option to most of the transformer for you to toggle between different matching algorithms. Right now, the default is `v1` which is the old algorithm, and `v3` is the new algorithm. When Shiki v3 is landed, the default will be `v3`.

```ts
const html = await codeToHtml(code, {
lang: 'ts',
theme: 'nord',
transformers: [
transformerNotationDiff({
matchAlgorithm: 'v3', // [!code hl]
}),
],
})
```

### `matchAlgorithm: 'v1'`

The matching algorithm mostly affects the single-line comment matching, in `v1`, it will count the comment line as the first line, while in `v3`, it will count start from the comment line:

```ts
// [\!code highlight:3]
console.log('highlighted') // [!code hl]
console.log('highlighted') // [!code hl]
console.log('not highlighted')
```

### `matchAlgorithm: 'v3'`

In `v3`, the matching algorithm will start counting from the line below the comment:

```ts
// [\!code highlight:2]
console.log('highlighted') // [!code hl]
console.log('highlighted') // [!code hl]
console.log('not highlighted')
```

## Transformers

### `transformerNotationDiff`
Expand Down
1 change: 0 additions & 1 deletion packages/transformers/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ export * from './transformers/remove-line-breaks'
export * from './transformers/remove-notation-escape'
export * from './transformers/render-whitespace'
export * from './transformers/style-to-class'
export * from './utils'
9 changes: 5 additions & 4 deletions packages/transformers/src/shared/highlight-word.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ function getTextContent(element: ElementContent): string {
* @param ignoredElement
* @param index highlight beginning index
* @param len highlight length
* @param className class name to add to highlighted nodes
*/
function highlightRange(
this: ShikiTransformerContext,
Expand Down Expand Up @@ -64,14 +65,14 @@ function highlightRange(
}
}

function hasOverlap(range1: [number, number], range2: [ number, number]): boolean {
function hasOverlap(range1: [number, number], range2: [number, number]): boolean {
return (range1[0] <= range2[1]) && (range1[1]) >= range2[0]
}

function separateToken(span: Element, textNode: Text, index: number, len: number): [
before: Element | undefined,
med: Element,
after: Element | undefined,
before: Element | undefined,
med: Element,
after: Element | undefined,
] {
const text = textNode.value

Expand Down
97 changes: 97 additions & 0 deletions packages/transformers/src/shared/notation-transformer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import type { Element, Text } from 'hast'
import type { ShikiTransformer, ShikiTransformerContext } from 'shiki'
import { parseComments, type ParsedComments, v1ClearEndCommentPrefix } from './parse-comments'

export type MatchAlgorithm = 'v1' | 'v3'

export interface MatchAlgorithmOptions {
/**
* Match algorithm to use
*
* @see https://shiki.style/packages/transformers#matching-algorithm
* @default 'v1'
*/
matchAlgorithm?: MatchAlgorithm
}

export function createCommentNotationTransformer(
name: string,
regex: RegExp,
onMatch: (
this: ShikiTransformerContext,
match: string[],
line: Element,
commentNode: Element,
lines: Element[],
index: number
) => boolean,
matchAlgorithm: MatchAlgorithm = 'v1',
): ShikiTransformer {
return {
name,
code(code) {
const lines = code.children.filter(i => i.type === 'element')
const linesToRemove: (Element | Text)[] = []

code.data ??= {} as any
const data = code.data as {
_shiki_notation?: ParsedComments
}

data._shiki_notation ??= parseComments(lines, ['jsx', 'tsx'].includes(this.options.lang), matchAlgorithm)
const parsed = data._shiki_notation

for (const comment of parsed) {
if (comment.info[1].length === 0)
continue

const isLineCommentOnly = comment.line.children.length === (comment.isJsxStyle ? 3 : 1)
let lineIdx = lines.indexOf(comment.line)
if (isLineCommentOnly && matchAlgorithm !== 'v1')
lineIdx++

let replaced = false
comment.info[1] = comment.info[1].replace(regex, (...match) => {
if (onMatch.call(this, match, comment.line, comment.token, lines, lineIdx)) {
replaced = true
return ''
}

return match[0]
})

if (!replaced)
continue

if (matchAlgorithm === 'v1') {
comment.info[1] = v1ClearEndCommentPrefix(comment.info[1])
}

const isEmpty = comment.info[1].trim().length === 0
// ignore comment node
if (isEmpty)
comment.info[1] = ''

if (isEmpty && isLineCommentOnly) {
linesToRemove.push(comment.line)
}
else if (isEmpty && comment.isJsxStyle) {
comment.line.children.splice(comment.line.children.indexOf(comment.token) - 1, 3)
}
else if (isEmpty) {
comment.line.children.splice(comment.line.children.indexOf(comment.token), 1)
}
else {
const head = comment.token.children[0]

if (head.type === 'text') {
head.value = comment.info.join('')
}
}
}

for (const line of linesToRemove)
code.children.splice(code.children.indexOf(line), 1)
},
}
}
134 changes: 134 additions & 0 deletions packages/transformers/src/shared/parse-comments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import type { Element, ElementContent } from 'hast'
import type { MatchAlgorithm } from './notation-transformer'

export type ParsedComments = {
line: Element
token: Element
info: [prefix: string, content: string, suffix?: string]
isJsxStyle: boolean
}[]

/**
* some comment formats have to be located at the end of line
* hence we can skip matching them for other tokens
*/
const matchers: [re: RegExp, endOfLine: boolean][] = [
[/^(<!--)(.+)(-->)$/, false],
[/^(\/\*)(.+)(\*\/)$/, false],
[/^(\/\/|["'#]|;{1,2}|%{1,2}|--)(.*)$/, true],
/**
* for multi-line comments like this
*/
[/^(\*)(.+)$/, true],
]

/**
* @param lines line tokens
* @param jsx enable JSX parsing
* @param matchAlgorithm matching algorithm
*/
export function parseComments(
lines: Element[],
jsx: boolean,
matchAlgorithm: MatchAlgorithm,
): ParsedComments {
const out: ParsedComments = []

for (const line of lines) {
const elements = line.children
let start = elements.length - 1
if (matchAlgorithm === 'v1')
start = 0
else if (jsx)
// one step further for JSX as comment is inside curly brackets
start = elements.length - 2

for (let i = Math.max(start, 0); i < elements.length; i++) {
const token = elements[i]
if (token.type !== 'element')
continue
const head = token.children.at(0)
if (head?.type !== 'text')
continue

const isLast = i === elements.length - 1
const match = matchToken(head.value, isLast)
if (!match)
continue

if (jsx && !isLast && i !== 0) {
out.push({
info: match,
line,
token,
isJsxStyle: isValue(elements[i - 1], '{') && isValue(elements[i + 1], '}'),
})
}
else {
out.push({
info: match,
line,
token,
isJsxStyle: false,
})
}
}
}

return out
}

function isValue(element: ElementContent, value: string): boolean {
if (element.type !== 'element')
return false
const text = element.children[0]
if (text.type !== 'text')
return false

return text.value.trim() === value
}

/**
* @param text text value of comment node
* @param isLast whether the token is located at the end of line
*/
function matchToken(text: string, isLast: boolean): [prefix: string, content: string, suffix?: string] | undefined {
// no leading and trailing spaces allowed for matchers
// we extract the spaces
let trimmed = text.trimStart()
const spaceFront = text.length - trimmed.length

trimmed = trimmed.trimEnd()
const spaceEnd = text.length - trimmed.length - spaceFront

for (const [matcher, endOfLine] of matchers) {
if (endOfLine && !isLast)
continue

const result = matcher.exec(trimmed)
if (!result)
continue

return [
' '.repeat(spaceFront) + result[1],
result[2],
result[3] ? result[3] + ' '.repeat(spaceEnd) : undefined,
]
}
}

/**
* Remove empty comment prefixes at line end, e.g. `// `
*
* For matchAlgorithm v1
*/
export function v1ClearEndCommentPrefix(text: string): string {
const regex = /(?:\/\/|["'#]|;{1,2}|%{1,2}|--)(.*)$/
const result = regex.exec(text)

if (result && result[1].trim().length === 0) {
return text.slice(0, result.index)
}

return text
}
12 changes: 8 additions & 4 deletions packages/transformers/src/transformers/meta-highlight.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ export function parseMetaHighlightString(meta: string): number[] | null {
const num = v.split('-').map(v => Number.parseInt(v, 10))
if (num.length === 1)
return [num[0]]
else
return Array.from({ length: num[1] - num[0] + 1 }, (_, i) => i + num[0])
return Array.from({ length: num[1] - num[0] + 1 }, (_, i) => i + num[0])
})
return lines
}
Expand Down Expand Up @@ -45,8 +44,13 @@ export function transformerMetaHighlight(
if (!this.options.meta?.__raw) {
return
}
;(this.meta as any)[symbol] ||= parseMetaHighlightString(this.options.meta.__raw)
const lines: number[] = (this.meta as any)[symbol] || []
const meta = this.meta as {
[symbol]: number[] | null
}

meta[symbol] ??= parseMetaHighlightString(this.options.meta.__raw)
const lines: number[] = meta[symbol] ?? []

if (lines.includes(line))
this.addClassToHast(node, className)
return node
Expand Down
4 changes: 3 additions & 1 deletion packages/transformers/src/transformers/notation-diff.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import type { ShikiTransformer } from 'shiki'
import type { MatchAlgorithmOptions } from '../shared/notation-transformer'
import { transformerNotationMap } from './notation-map'

export interface TransformerNotationDiffOptions {
export interface TransformerNotationDiffOptions extends MatchAlgorithmOptions {
/**
* Class for added lines
*/
Expand Down Expand Up @@ -35,6 +36,7 @@ export function transformerNotationDiff(
'--': classLineRemove,
},
classActivePre,
matchAlgorithm: options.matchAlgorithm,
},
'@shikijs/transformers:notation-diff',
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import type { ShikiTransformer } from 'shiki'
import type { MatchAlgorithmOptions } from '../shared/notation-transformer'
import { transformerNotationMap } from './notation-map'

export interface TransformerNotationErrorLevelOptions {
export interface TransformerNotationErrorLevelOptions extends MatchAlgorithmOptions {
classMap?: Record<string, string | string[]>
/**
* Class added to the <pre> element when the current code has diff
Expand All @@ -27,6 +28,7 @@ export function transformerNotationErrorLevel(
{
classMap,
classActivePre,
matchAlgorithm: options.matchAlgorithm,
},
'@shikijs/transformers:notation-error-level',
)
Expand Down
4 changes: 3 additions & 1 deletion packages/transformers/src/transformers/notation-focus.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import type { ShikiTransformer } from 'shiki'
import type { MatchAlgorithmOptions } from '../shared/notation-transformer'
import { transformerNotationMap } from './notation-map'

export interface TransformerNotationFocusOptions {
export interface TransformerNotationFocusOptions extends MatchAlgorithmOptions {
/**
* Class for focused lines
*/
Expand Down Expand Up @@ -29,6 +30,7 @@ export function transformerNotationFocus(
focus: classActiveLine,
},
classActivePre,
matchAlgorithm: options.matchAlgorithm,
},
'@shikijs/transformers:notation-focus',
)
Expand Down
Loading

0 comments on commit ceca984

Please sign in to comment.