From c7f7e32d0d4c56a0503e51e9e11cbad47a3c85e3 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Thu, 8 Aug 2024 11:08:21 +1000 Subject: [PATCH 1/8] Move all doc preparation to top of specs --- test/unit/htmlProcessing.test.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index 529c375e..26e69b84 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -19,9 +19,20 @@ describe('HTML processing', () => { processedHTML: null } beforeAll(() => { + // General supported formats testGlobal.rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) testGlobal.processedHTML = stubbedProcessedDoc(testGlobal.rawHTML).html testGlobal.output = cheerio.load(testGlobal.processedHTML) + + // Supported formats with inline code enabled + jest.resetModules() + process.env.ALLOW_INLINE_CODE = 'true' + // remove formatter from require cache to recognize changed env variable + delete require.cache[require.resolve('../../server/formatter')] + getProcessedDocAttributes = require('../../server/formatter').getProcessedDocAttributes + const rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) + const processedHTML = stubbedProcessedDoc(rawHTML).html + testGlobal.codeEnabledOut = cheerio.load(processedHTML) }) it('does not throw when revision data is unavailable', () => { @@ -143,17 +154,6 @@ describe('HTML processing', () => { }) describe('with inline code enabled', () => { - beforeAll(() => { - jest.resetModules() - process.env.ALLOW_INLINE_CODE = 'true' - // remove formatter from require cache to recognize changed env variable - delete require.cache[require.resolve('../../server/formatter')] - getProcessedDocAttributes = require('../../server/formatter').getProcessedDocAttributes - const rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) - const processedHTML = stubbedProcessedDoc(rawHTML).html - testGlobal.codeEnabledOut = cheerio.load(processedHTML) - }) - it('does not modify code block content', () => { const codeBlock = testGlobal.codeEnabledOut("pre:contains('codeblocks will not')") assert.match(codeBlock.html(), /<.*%-.*%>/) From 4f0a9e507b469a44a5af9bea821c90013a01a780 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Thu, 8 Aug 2024 11:16:09 +1000 Subject: [PATCH 2/8] Move loaded fixtures to object --- test/unit/htmlProcessing.test.js | 74 ++++++++++++++++---------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index 26e69b84..62049565 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -13,16 +13,14 @@ function stubbedProcessedDoc(unprocessedHtml, editorName) { } describe('HTML processing', () => { - const testGlobal = { - rawHTML: null, - output: () => {}, - processedHTML: null - } + const testGlobal = {} + beforeAll(() => { // General supported formats - testGlobal.rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) - testGlobal.processedHTML = stubbedProcessedDoc(testGlobal.rawHTML).html - testGlobal.output = cheerio.load(testGlobal.processedHTML) + testGlobal.general = {} + testGlobal.general.rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) + testGlobal.general.processedHTML = stubbedProcessedDoc(testGlobal.general.rawHTML).html + testGlobal.general.output = cheerio.load(testGlobal.general.processedHTML) // Supported formats with inline code enabled jest.resetModules() @@ -30,9 +28,11 @@ describe('HTML processing', () => { // remove formatter from require cache to recognize changed env variable delete require.cache[require.resolve('../../server/formatter')] getProcessedDocAttributes = require('../../server/formatter').getProcessedDocAttributes - const rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) - const processedHTML = stubbedProcessedDoc(rawHTML).html - testGlobal.codeEnabledOut = cheerio.load(processedHTML) + + testGlobal.inlineCode = {} + testGlobal.inlineCode.rawHTML = fs.readFileSync(docPath, {encoding: 'utf8'}) + testGlobal.inlineCode.processedHTML = stubbedProcessedDoc(testGlobal.inlineCode.rawHTML).html + testGlobal.inlineCode.output = cheerio.load(testGlobal.inlineCode.processedHTML) }) it('does not throw when revision data is unavailable', () => { @@ -41,38 +41,38 @@ describe('HTML processing', () => { }) it('strips unnecessary styles', () => { - const header = testGlobal.output('h2') + const header = testGlobal.general.output('h2') assert.equal(null, header.attr('style')) }) it('strips unnecessary  s', () => { - const introHTML = testGlobal.output("p:contains('Basic text format')").html() + const introHTML = testGlobal.general.output("p:contains('Basic text format')").html() assert.match(introHTML, /Text color and highlighting/) }) describe('inline formats', () => { it('preserves bolds', () => { - const boldSpan = testGlobal.output("span:contains('bold')").first() + const boldSpan = testGlobal.general.output("span:contains('bold')").first() assert.equal('font-weight:700', boldSpan.attr('style')) }) it('preserves italics', () => { - const italicSpan = testGlobal.output("span:contains('italic')").first() + const italicSpan = testGlobal.general.output("span:contains('italic')").first() assert.equal('font-style:italic', italicSpan.attr('style')) }) it('preserves underlines', () => { - const underlinedSpan = testGlobal.output("span:contains('underline')").first() + const underlinedSpan = testGlobal.general.output("span:contains('underline')").first() assert.equal('text-decoration:underline', underlinedSpan.attr('style')) }) it('preserves combined formats', () => { - const combinedSpan = testGlobal.output("span:contains('combined')").first() + const combinedSpan = testGlobal.general.output("span:contains('combined')").first() assert.equal('font-style:italic;font-weight:700;text-decoration:underline', combinedSpan.attr('style')) }) it('preserves image widths', () => { - const imageWidth = testGlobal.output('img').first() + const imageWidth = testGlobal.general.output('img').first() const widthMatch = imageWidth.attr('style').match('width') assert.isNotNull(widthMatch) }) @@ -80,58 +80,58 @@ describe('HTML processing', () => { describe('list handling', () => { it('preserves classing on lists', () => { - const ol = testGlobal.output('ol').first() + const ol = testGlobal.general.output('ol').first() assert.match(ol.attr('class'), /lst-/) }) it('presrves the associated style block for lists', () => { - const olClass = testGlobal.output('ol').first().attr('class').split(' ')[0] - assert.match(testGlobal.processedHTML, new RegExp(`ol.${olClass} {`)) + const olClass = testGlobal.general.output('ol').first().attr('class').split(' ')[0] + assert.match(testGlobal.general.processedHTML, new RegExp(`ol.${olClass} {`)) }) it('applies a level- class on lists to support indentation', () => { - const topLevelList = testGlobal.output("ul:contains('Item 1')").first() + const topLevelList = testGlobal.general.output("ul:contains('Item 1')").first() assert.match(topLevelList.attr('class'), / level-0/) - const nestedList = testGlobal.output("ul:contains('Item 1.1')").first() + const nestedList = testGlobal.general.output("ul:contains('Item 1.1')").first() assert.match(nestedList.attr('class'), / level-1/) }) }) describe('code block handling', () => { it('highlights registered languages', () => { - const codeBlock = testGlobal.output('pre > code[data-lang="javascript"]') + const codeBlock = testGlobal.general.output('pre > code[data-lang="javascript"]') assert.exists(codeBlock.html()) }) it('allows   as part of a code block', () => { - const codeBlock = testGlobal.output('pre > code[data-lang="javascript"]') + const codeBlock = testGlobal.general.output('pre > code[data-lang="javascript"]') assert.match(codeBlock.html(), /&nbsp/) }) it('preserves whitespace at the start of a line', () => { - const codeBlock = testGlobal.output('pre > code[data-lang="javascript"]') + const codeBlock = testGlobal.general.output('pre > code[data-lang="javascript"]') assert.match(codeBlock.html(), / +jQuery.fn.calcSubWidth/) }) it('scrubs smart quotes', () => { - const codeBlock = testGlobal.output('pre > code[data-lang="javascript"]') + const codeBlock = testGlobal.general.output('pre > code[data-lang="javascript"]') assert.match(codeBlock.html(), /singleQuotedStr = .*'str'/) assert.match(codeBlock.html(), /doubleQuotedStr = .*"str"/) }) it('allows unregistered languages', () => { - const codeBlock = testGlobal.output('pre') + const codeBlock = testGlobal.general.output('pre') assert.match(codeBlock.html(), /1 \+ 1 == 5/) }) it('retains code block backticks', () => { - const codeBlock = testGlobal.output('pre > code[data-lang="javascript"]') + const codeBlock = testGlobal.general.output('pre > code[data-lang="javascript"]') assert.match(codeBlock.html(), /`/) }) it('retains inline code backticks', () => { - const codeBlock = testGlobal.output("code:contains('backtick')") + const codeBlock = testGlobal.general.output("code:contains('backtick')") assert.match(codeBlock.html(), /`backtick`/) }) }) @@ -139,12 +139,12 @@ describe('HTML processing', () => { describe('inline code handling', () => { describe('with inline code disabled', () => { it('does not modify code block content', () => { - const codeBlock = testGlobal.output("pre:contains('codeblocks will not')") + const codeBlock = testGlobal.general.output("pre:contains('codeblocks will not')") assert.match(codeBlock.html(), /<.*%-.*%>/) }) it('does not unescape delimited code', () => { - const className = testGlobal.output("p:contains('.purplePapyrus')") + const className = testGlobal.general.output("p:contains('.purplePapyrus')") const styleTag = className.prev() const openingTag = styleTag.prev() @@ -155,13 +155,13 @@ describe('HTML processing', () => { describe('with inline code enabled', () => { it('does not modify code block content', () => { - const codeBlock = testGlobal.codeEnabledOut("pre:contains('codeblocks will not')") + const codeBlock = testGlobal.inlineCode.output("pre:contains('codeblocks will not')") assert.match(codeBlock.html(), /<.*%-.*%>/) }) it('properly unescapes delimited code', () => { - const style = testGlobal.codeEnabledOut("style:contains('.purplePapyrus')") - const styledDiv = testGlobal.codeEnabledOut('div.purplePapyrus') + const style = testGlobal.inlineCode.output("style:contains('.purplePapyrus')") + const styledDiv = testGlobal.inlineCode.output('div.purplePapyrus') assert.exists(style) assert.exists(styledDiv) @@ -173,11 +173,11 @@ describe('HTML processing', () => { describe('comment handling', () => { it('strips comments', () => { - assert.notMatch(testGlobal.processedHTML, /This comment text will not appear/) + assert.notMatch(testGlobal.general.processedHTML, /This comment text will not appear/) }) it('strips inline comment anchors', () => { - const commentAnchorParent = testGlobal.output("p:contains('will be stripped from the')") + const commentAnchorParent = testGlobal.general.output("p:contains('will be stripped from the')") assert.notMatch(commentAnchorParent, /\[a\]/) }) }) From e3f84db0e36aef0224df753093d5c4e405122f9e Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Thu, 8 Aug 2024 11:29:47 +1000 Subject: [PATCH 3/8] Extract formatCodeBlock() in formatter --- server/formatter.js | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/server/formatter.js b/server/formatter.js index 1539f63c..e3756edc 100644 --- a/server/formatter.js +++ b/server/formatter.js @@ -106,20 +106,7 @@ function normalizeHtml(html) { function formatCode(html) { // Expand code blocks html = html.replace(/```(.*?)```/ig, (match, content) => { - // strip interior

tags added by google - content = content.replace(/(?:<\/p>

|)/g, '\n').replace(/<\/?p>/g, '').trim() - // try to find language hint within text block - const [, lang] = content.match(/^(.+?)\n/) || [] - - if (lang && hljs.getLanguage(lang)) { - // if the language hint exists and contains a valid language, remove it from the code block - content = content.replace(`${lang}\n`, '') - - const textOnlyContent = cheerio.load(content).text() - const highlighted = hljs.highlight(lang, textOnlyContent, true) - return `

${formatCodeContent(highlighted.value)}
` - } - return `
${formatCodeContent(content)}
` + return formatCodeBlock(content) }) // Replace double backticks with , for supporting backticks in inline code blocks @@ -152,6 +139,23 @@ function formatCode(html) { return html } +function formatCodeBlock(content) { + // strip interior

tags added by google + content = content.replace(/(?:<\/p>

|)/g, '\n').replace(/<\/?p>/g, '').trim() + // try to find language hint within text block + const [, lang] = content.match(/^(.+?)\n/) || [] + + if (lang && hljs.getLanguage(lang)) { + // if the language hint exists and contains a valid language, remove it from the code block + content = content.replace(`${lang}\n`, '') + + const textOnlyContent = cheerio.load(content).text() + const highlighted = hljs.highlight(lang, textOnlyContent, true) + return `

${formatCodeContent(highlighted.value)}
` + } + return `
${formatCodeContent(content)}
` +} + function formatCodeContent(content) { content = content.replace(/[‘’]|ȁ[89];/g, "'").replace(/[“”]|ȁ[CD];/g, '"') // remove smart quotes content = content.replace(/`/g, '`') // remove internal cases of backticks From 823dbfff4c9c0b305d7ae0dbdabb2caf50cf4e8c Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Thu, 8 Aug 2024 11:36:23 +1000 Subject: [PATCH 4/8] Add support for formatting native code blocks --- server/formatter.js | 5 +++++ .../fixtures/supportedFormats.nativeCode.html | 1 + test/unit/htmlProcessing.test.js | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 test/fixtures/supportedFormats.nativeCode.html diff --git a/server/formatter.js b/server/formatter.js index e3756edc..6601da9e 100644 --- a/server/formatter.js +++ b/server/formatter.js @@ -109,6 +109,11 @@ function formatCode(html) { return formatCodeBlock(content) }) + // Expand native code blocks + html = html.replace(/

(.*?)<\/p>/ig, (match, content) => { + return formatCodeBlock(content) + }) + // Replace double backticks with , for supporting backticks in inline code blocks html = html.replace(/``(.+?`?)``/g, (match, content) => { return `${formatCodeContent(content)}` diff --git a/test/fixtures/supportedFormats.nativeCode.html b/test/fixtures/supportedFormats.nativeCode.html new file mode 100644 index 00000000..4691086e --- /dev/null +++ b/test/fixtures/supportedFormats.nativeCode.html @@ -0,0 +1 @@ +

Intro sentence.

// This is a code block

// Here's another line

Outro sentence.

diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index 62049565..587f7f93 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -5,6 +5,7 @@ const {assert} = require('chai') let {getProcessedDocAttributes} = require('../../server/formatter') const docPath = path.join(__dirname, '../fixtures/supportedFormats.html') +const docPathNativeCode = path.join(__dirname, '../fixtures/supportedFormats.nativeCode.html') // helper function to stub the doc and get a section of the returned document function stubbedProcessedDoc(unprocessedHtml, editorName) { @@ -22,6 +23,12 @@ describe('HTML processing', () => { testGlobal.general.processedHTML = stubbedProcessedDoc(testGlobal.general.rawHTML).html testGlobal.general.output = cheerio.load(testGlobal.general.processedHTML) + // Native code + testGlobal.native = {} + testGlobal.native.rawHTML = fs.readFileSync(docPathNativeCode, {encoding: 'utf8'}) + testGlobal.native.processedHTML = stubbedProcessedDoc(testGlobal.native.rawHTML).html + testGlobal.native.output = cheerio.load(testGlobal.native.processedHTML) + // Supported formats with inline code enabled jest.resetModules() process.env.ALLOW_INLINE_CODE = 'true' @@ -136,6 +143,18 @@ describe('HTML processing', () => { }) }) + describe('native code block handling', () => { + it('formats the code block', () => { + const codeBlock = testGlobal.native.output('pre > code') + assert.exists(codeBlock.html()) + }) + + it('removes code block marker unicode characters', () => { + assert.notInclude(testGlobal.native.processedHTML, '') + assert.notInclude(testGlobal.native.processedHTML, '') + }) + }) + describe('inline code handling', () => { describe('with inline code disabled', () => { it('does not modify code block content', () => { From 9c3240567283c463f15f6dd1ccf6139d5b494a40 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 13 Aug 2024 09:58:14 +1000 Subject: [PATCH 5/8] Correctly render tightly interleaved text/code blocks --- server/formatter.js | 4 ++-- test/fixtures/supportedFormats.nativeCode.html | 2 +- test/unit/htmlProcessing.test.js | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/server/formatter.js b/server/formatter.js index 6601da9e..175d2a6d 100644 --- a/server/formatter.js +++ b/server/formatter.js @@ -110,8 +110,8 @@ function formatCode(html) { }) // Expand native code blocks - html = html.replace(/

(.*?)<\/p>/ig, (match, content) => { - return formatCodeBlock(content) + html = html.replace(/

(.*?)(.*?)<\/p>/ig, (match, content, tailingPara) => { + return `${formatCodeBlock(content)}

${tailingPara}

` }) // Replace double backticks with , for supporting backticks in inline code blocks diff --git a/test/fixtures/supportedFormats.nativeCode.html b/test/fixtures/supportedFormats.nativeCode.html index 4691086e..6933e7d6 100644 --- a/test/fixtures/supportedFormats.nativeCode.html +++ b/test/fixtures/supportedFormats.nativeCode.html @@ -1 +1 @@ -

Intro sentence.

// This is a code block

// Here's another line

Outro sentence.

+

Intro sentence.

// This is a code block

// Here's another line

Middle sentence.

# A second code block

Outro sentence.

\ No newline at end of file diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index 587f7f93..f0688f98 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -144,9 +144,10 @@ describe('HTML processing', () => { }) describe('native code block handling', () => { - it('formats the code block', () => { + it('formats the code blocks', () => { const codeBlock = testGlobal.native.output('pre > code') assert.exists(codeBlock.html()) + assert.equal(codeBlock.length, 2) }) it('removes code block marker unicode characters', () => { From 2720f4bb36e41fd1dd9e7e1388162e77fe9a0115 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 13 Aug 2024 12:38:46 +1000 Subject: [PATCH 6/8] Leave tags following code blocks intact Google docs interleaves the end-of-code marker with the following tag. eg:

my code block

my heading

Make sure we match and retain the following tag. --- server/formatter.js | 7 +++++-- test/fixtures/supportedFormats.nativeCode.html | 2 +- test/unit/htmlProcessing.test.js | 7 ++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/server/formatter.js b/server/formatter.js index 175d2a6d..c6440590 100644 --- a/server/formatter.js +++ b/server/formatter.js @@ -110,8 +110,11 @@ function formatCode(html) { }) // Expand native code blocks - html = html.replace(/

(.*?)(.*?)<\/p>/ig, (match, content, tailingPara) => { - return `${formatCodeBlock(content)}

${tailingPara}

` + // Google docs interleaves the end-of-code marker with the following tag. eg: + //

my code block

my heading

+ // Make sure we match and retain the following tag + html = html.replace(/

(.*?)<\/p>(<[^>]*>)/ig, (match, content, followingTag) => { + return `${formatCodeBlock(content)}${followingTag}` }) // Replace double backticks with , for supporting backticks in inline code blocks diff --git a/test/fixtures/supportedFormats.nativeCode.html b/test/fixtures/supportedFormats.nativeCode.html index 6933e7d6..7f4e8d80 100644 --- a/test/fixtures/supportedFormats.nativeCode.html +++ b/test/fixtures/supportedFormats.nativeCode.html @@ -1 +1 @@ -

Intro sentence.

// This is a code block

// Here's another line

Middle sentence.

# A second code block

Outro sentence.

\ No newline at end of file +

Intro sentence.

// This is a code block

// Here's another line

Middle sentence.

# A second code block

Outro sentence.

/* Another code block */

Heading following a code block

diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index f0688f98..eb42c730 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -147,7 +147,12 @@ describe('HTML processing', () => { it('formats the code blocks', () => { const codeBlock = testGlobal.native.output('pre > code') assert.exists(codeBlock.html()) - assert.equal(codeBlock.length, 2) + assert.equal(codeBlock.length, 3) + }) + + it('leaves the trailing heading intact', () => { + const heading = testGlobal.native.output('h2') + assert.equal(heading.html(), 'Heading following a code block') }) it('removes code block marker unicode characters', () => { From 0c6ac3da0a40c16d40f2ad7bdccd37c6d75d1f18 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Thu, 15 Aug 2024 15:12:46 +1000 Subject: [PATCH 7/8] Unnest start and end markers --- server/formatter.js | 6 ++++++ test/unit/htmlProcessing.test.js | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/server/formatter.js b/server/formatter.js index c6440590..a1f49057 100644 --- a/server/formatter.js +++ b/server/formatter.js @@ -109,6 +109,12 @@ function formatCode(html) { return formatCodeBlock(content) }) + // Preformat native code blocks + // Unnest native code block start and end markers + html = html.replace(/]*>(ເ[23];)<\/span>/ig, (match, marker) => { + return marker + }) + // Expand native code blocks // Google docs interleaves the end-of-code marker with the following tag. eg: //

my code block

my heading

diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index eb42c730..cd325e4e 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -15,6 +15,7 @@ function stubbedProcessedDoc(unprocessedHtml, editorName) { describe('HTML processing', () => { const testGlobal = {} + const condenseHtml = (html) => html.replace(/\n/g, '').replace(/>\s+<') beforeAll(() => { // General supported formats @@ -159,6 +160,20 @@ describe('HTML processing', () => { assert.notInclude(testGlobal.native.processedHTML, '') assert.notInclude(testGlobal.native.processedHTML, '') }) + + it('unnests start and end markers', () => { + const html = condenseHtml(` +

+ \uEC03 + my code +

+

+ \uEC02 +

+ `) + const processedHtml = stubbedProcessedDoc(html).html + assert.equal(processedHtml, '
my code
\n

') + }) }) describe('inline code handling', () => { From 2a604798256bb25c9aaf4fe3daf85ba112076d8d Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Thu, 15 Aug 2024 15:21:08 +1000 Subject: [PATCH 8/8] Process

tags with attributes --- server/formatter.js | 4 ++-- test/unit/htmlProcessing.test.js | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/server/formatter.js b/server/formatter.js index a1f49057..fe176707 100644 --- a/server/formatter.js +++ b/server/formatter.js @@ -119,7 +119,7 @@ function formatCode(html) { // Google docs interleaves the end-of-code marker with the following tag. eg: //

my code block

my heading

// Make sure we match and retain the following tag - html = html.replace(/

(.*?)<\/p>(<[^>]*>)/ig, (match, content, followingTag) => { + html = html.replace(/]*>(.*?)<\/p>(<[^>]*>)/ig, (match, content, followingTag) => { return `${formatCodeBlock(content)}${followingTag}` }) @@ -155,7 +155,7 @@ function formatCode(html) { function formatCodeBlock(content) { // strip interior

tags added by google - content = content.replace(/(?:<\/p>

|)/g, '\n').replace(/<\/?p>/g, '').trim() + content = content.replace(/(?:<\/p>]*>|)/g, '\n').replace(/<\/?p>/g, '').trim() // try to find language hint within text block const [, lang] = content.match(/^(.+?)\n/) || [] diff --git a/test/unit/htmlProcessing.test.js b/test/unit/htmlProcessing.test.js index cd325e4e..f08355e3 100644 --- a/test/unit/htmlProcessing.test.js +++ b/test/unit/htmlProcessing.test.js @@ -174,6 +174,17 @@ describe('HTML processing', () => { const processedHtml = stubbedProcessedDoc(html).html assert.equal(processedHtml, '

my code
\n

') }) + + it('removes interior

tags with attributes', () => { + const html = condenseHtml(` +

\uEC03my code

+

more code

+

\uEC02

+ `) + console.log(html) + const processedHtml = stubbedProcessedDoc(html).html + assert.equal(processedHtml, '
my code\nmore code
\n

') + }) }) describe('inline code handling', () => {