Skip to content

Commit

Permalink
Merge branch 'main' into dev-mg-html-lexical
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulAdamDavis committed Dec 4, 2023
2 parents 34b458f + 2d600ab commit d18a0fb
Show file tree
Hide file tree
Showing 84 changed files with 1,717 additions and 406 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
node: [ '18.12.1' ]
name: Node ${{ matrix.node }}
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
- uses: actions/setup-node@v3
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
- uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
- run: yarn global add lerna
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Each tool has its own detailed documentation:
- [Jekyll](https://github.com/TryGhost/migrate/tree/main/packages/mg-jekyll-export)
- [Letterdrop](https://github.com/TryGhost/migrate/tree/main/packages/mg-letterdrop)
- [Libsyn](https://github.com/TryGhost/migrate/tree/main/packages/mg-libsyn)
- [Mailchimp Members](https://github.com/TryGhost/migrate/tree/main/packages/mg-mailchimp-members)
- [Medium Content](https://github.com/TryGhost/migrate/tree/main/packages/mg-medium-export)
- [Medium Members](https://github.com/TryGhost/migrate/tree/main/packages/mg-medium-members)
- [Squarespace](https://github.com/TryGhost/migrate/tree/main/packages/mg-squarespace-xml)
Expand Down
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
},
"devDependencies": {
"@tryghost/errors": "1.2.26",
"eslint": "8.50.0",
"eslint-plugin-ghost": "3.3.2",
"eslint": "8.54.0",
"eslint-plugin-ghost": "3.4.0",
"jest": "29.7.0",
"jest-extended": "4.0.1"
"jest-extended": "4.0.2"
}
}
4 changes: 2 additions & 2 deletions packages/listr-smart-renderer/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@tryghost/listr-smart-renderer",
"version": "0.5.11",
"version": "0.5.12",
"repository": "https://github.com/TryGhost/migrate/tree/main/packages/listr-smart-renderer",
"author": "Ghost Foundation",
"license": "MIT",
Expand All @@ -27,7 +27,7 @@
},
"devDependencies": {
"jest": "29.7.0",
"jest-extended": "4.0.1"
"jest-extended": "4.0.2"
},
"dependencies": {
"chalk": "5.3.0",
Expand Down
4 changes: 2 additions & 2 deletions packages/mg-assetscraper/lib/AssetScraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import AssetCache from './AssetCache.js';
// Taken from https://github.com/TryGhost/Ghost/blob/main/ghost/core/core/shared/config/overrides.json
const knownImageTypes = ['image/jpeg', 'image/jpg', 'image/png', 'image/gif', 'image/svg+xml', 'image/x-icon', 'image/vnd.microsoft.icon', 'image/webp'];
const knownMediaTypes = ['video/mp4', 'video/webm', 'video/ogg', 'audio/mpeg', 'audio/mp3', 'audio/vnd.wav', 'audio/wave', 'audio/wav', 'audio/x-wav', 'audio/ogg', 'audio/x-m4a'];
const knownFileTypes = ['application/pdf', 'application/json', 'application/ld+json', 'application/vnd.oasis.opendocument.presentation', 'application/vnd.oasis.opendocument.spreadsheet', 'application/vnd.oasis.opendocument.text', 'application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/rtf', 'text/plain', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/xml', 'application/atom+xml'];
const knownFileTypes = ['application/pdf', 'application/json', 'application/ld+json', 'application/vnd.oasis.opendocument.presentation', 'application/vnd.oasis.opendocument.spreadsheet', 'application/vnd.oasis.opendocument.text', 'application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/rtf', 'text/plain', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/xml', 'application/atom+xml', 'application/x-msdownload'];
const knownTypes = [...knownImageTypes, ...knownMediaTypes, ...knownFileTypes];

const isValidUrlString = (string) => {
Expand Down Expand Up @@ -612,7 +612,7 @@ export default class AssetScraper {
if (res.headers) {
let theHeaders = res.headers;

if (theHeaders['content-type'] === 'application/octet-stream') {
if (theHeaders['content-type'] === 'application/octet-stream' && theHeaders['content-disposition']) {
const disposition = theHeaders['content-disposition'];
const parts = disposition.split('.');
const extension = parts.pop();
Expand Down
16 changes: 8 additions & 8 deletions packages/mg-assetscraper/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@tryghost/mg-assetscraper",
"version": "0.4.4",
"version": "0.4.6",
"repository": "https://github.com/TryGhost/migrate/tree/main/packages/mg-assetscraper",
"author": "Ghost Foundation",
"license": "MIT",
Expand All @@ -20,19 +20,19 @@
"access": "public"
},
"devDependencies": {
"eslint": "8.50.0",
"eslint": "8.54.0",
"jest": "29.7.0",
"jest-extended": "4.0.1",
"jest-extended": "4.0.2",
"listr": "0.14.3"
},
"dependencies": {
"@tryghost/listr-smart-renderer": "^0.5.11",
"@tryghost/logging": "2.4.8",
"@tryghost/string": "0.2.8",
"@tryghost/listr-smart-renderer": "^0.5.12",
"@tryghost/logging": "2.4.9",
"@tryghost/string": "0.2.10",
"cheerio": "1.0.0-rc.12",
"fast-replaceall": "1.0.2",
"file-type": "18.5.0",
"fs-extra": "11.1.1",
"file-type": "18.7.0",
"fs-extra": "11.2.0",
"got": "11.8.6",
"listr": "0.14.3",
"markdown-it": "13.0.2",
Expand Down
6 changes: 3 additions & 3 deletions packages/mg-beehiiv-members/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@tryghost/mg-beehiiv-members",
"version": "0.1.1",
"version": "0.1.3",
"repository": "https://github.com/TryGhost/migrate/tree/main/packages/mg-beehiiv-members",
"author": "Ghost Foundation",
"license": "MIT",
Expand Down Expand Up @@ -28,9 +28,9 @@
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"c8": "8.0.1",
"eslint": "8.50.0",
"eslint": "8.54.0",
"jest": "29.7.0",
"sinon": "^16.0.0",
"sinon": "^17.0.0",
"typescript": "5.2.2"
},
"dependencies": {
Expand Down
146 changes: 0 additions & 146 deletions packages/mg-beehiiv-members/src/lib/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,149 +52,3 @@ const processCsv = async ({csvPath}: {csvPath: string}) => {
export {
processCsv
};

// import $ from 'cheerio';
// import sanitizeHtml from 'sanitize-html';

// const getYouTubeID = (url: string) => {
// const arr = url.split(/(vi\/|v%3D|v=|\/v\/|youtu\.be\/|\/embed\/)/);
// return undefined !== arr[2] ? arr[2].split(/[^\w-]/i)[0] : arr[0];
// };

// const processHTML = ({html, postData, options}: {html: string, postData?: mappedDataObject, options: any}) => {
// // First, clean up the email HTML to remove bits we don't want or change up

// // Let's do some regexp magic to remove some beehiiv variables
// // https://support.beehiiv.com/hc/en-us/articles/7606088263191
// html = html.replace(/{{subscriber_id}}/g, '#');
// html = html.replace(/{{rp_refer_url}}/g, '#');

// let $html = $.load(html, {
// xmlMode: true,
// decodeEntities: false
// });

// // Remove hidden elements
// $html('[style*="display:none"]').remove();
// $html('[style*="display: none"]').remove();

// // Remove the share links at the top
// $html('table.mob-block').remove();

// // Remove the open tracking pixel element
// $html('div[data-open-tracking="true"]:contains("{{OPEN_TRACKING_PIXEL}}")').remove();

// $html('p:contains("{{rp_personalized_text}}")').remove();
// $html('img[src="{{rp_next_milestone_image_url}}"]').remove();
// $html(`a[href="{{rp_referral_hub_url}}"]`).remove();

// // Remove unsubscribe links, social links, & email footer
// $html('td.b').remove();

// // Remove the 'Read online' link
// $html('td.f').remove();

// // Remove the post title container, otherwise it would be a duplicate
// if (postData?.data?.title) {
// $html(`h1:contains("${postData.data.title}")`).parentsUntil('table').remove();
// }

// // Convert '...' to <hr />
// $html('p').each((i, el) => {
// const text = $html(el).text().trim();
// if (text === '...' || text === '…' || text === '&hellip;') {
// $html(el).replaceWith('<hr />');
// }
// });

// $html('a[href*="youtube.com"]').each((i, el) => {
// const imageCount = $html(el).find('img').length;
// const hasPlayIcon = $html(el).find('img[src*="youtube_play_icon.png"]').length;
// const hasThumbnail = $html(el).find('img[src*="i.ytimg.com/vi"]').length;
// const src = $html(el).attr('href');
// const captionText = $html(el).find('p')?.text()?.trim() || false;
// const captionHtml = $html(el).find('p')?.html()?.trim() || false;

// if (imageCount === 2 && hasPlayIcon && hasThumbnail && src) {
// const videoID = getYouTubeID(src);

// const $figure = $(`<figure></figure>`);
// $figure.addClass('kg-card kg-embed-card');

// const $figcaption = $(`<figcaption></figcaption>`);
// const $iframe = $(`<iframe src="https://www.youtube.com/embed/${videoID}?feature=oembed" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen=""></iframe>`);

// $figure.append($iframe);

// if (captionText && captionText.length > 0) {
// $iframe.attr('title', captionText);
// }

// if (captionHtml) {
// $figure.addClass('kg-card-hascaption');
// $figcaption.html(captionHtml);
// $figure.append($figcaption);
// }

// $html(el).replaceWith($figure);
// }
// });

// if (options?.url && options?.subscribeLink) {
// $html(`a[href^="${options.url}/subscribe"]`).each((i, el) => {
// $html(el).attr('href', options.subscribeLink);
// $html(el).removeAttr('target');
// $html(el).removeAttr('rel');
// });
// }

// // Get the cleaned HTML
// const bodyHtml = $html('body').html();

// // Pass the cleaned HTML through the sanitizer to only include specific elements
// const sanitizedHtml = sanitizeHtml(bodyHtml, {
// allowedTags: [
// 'b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'blockquote',
// 'figure', 'figcaption', 'img', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
// 'div', 'hr', 'iframe'
// ],
// allowedAttributes: {
// a: ['href', 'title', 'rel', 'target'],
// img: ['src', 'alt', 'title'],
// iframe: ['width', 'height', 'src', 'title', 'frameborder', 'allow', 'allowfullscreen'],
// figure: ['class']
// }
// });

// return sanitizedHtml.trim();
// };

// const removeDuplicateFeatureImage = ({html, featureSrc}: {html: string, featureSrc: string}) => {
// let $html = $.load(html, {
// xmlMode: true,
// decodeEntities: false
// });

// let firstElement = $html('*').first();

// if (($(firstElement).get(0) && $(firstElement).get(0).name === 'img') || $(firstElement).find('img').length) {
// let theElementItself = $(firstElement).get(0).name === 'img' ? firstElement : $(firstElement).find('img');
// let firstImgSrc: any = $(theElementItself).attr('src');

// if (featureSrc.length > 0 && firstImgSrc) {
// let normalizedFirstSrc = firstImgSrc.replace('fit=scale-down,format=auto,onerror=redirect,quality=80', 'quality=100');

// if (featureSrc === normalizedFirstSrc) {
// $(theElementItself).remove();
// }
// }
// }

// return $html.html();
// };

// export {
// getYouTubeID,
// processHTML,
// removeDuplicateFeatureImage
// };
8 changes: 4 additions & 4 deletions packages/mg-beehiiv/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@tryghost/mg-beehiiv",
"version": "0.1.1",
"version": "0.1.3",
"repository": "https://github.com/TryGhost/migrate/tree/main/packages/mg-beehiiv",
"author": "Ghost Foundation",
"license": "MIT",
Expand Down Expand Up @@ -29,16 +29,16 @@
"@typescript-eslint/parser": "^6.0.0",
"c8": "8.0.1",
"dotenv": "16.3.1",
"eslint": "8.50.0",
"eslint": "8.54.0",
"jest": "29.7.0",
"sinon": "^16.0.0",
"sinon": "^17.0.0",
"typescript": "5.2.2"
},
"dependencies": {
"@tryghost/debug": "0.1.26",
"@tryghost/errors": "1.2.26",
"@tryghost/mg-fs-utils": "0.12.14",
"@tryghost/string": "0.2.8",
"@tryghost/string": "0.2.10",
"cheerio": "1.0.0-rc.12",
"sanitize-html": "2.11.0"
}
Expand Down
20 changes: 16 additions & 4 deletions packages/mg-blogger/lib/process.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ const getAllAttributes = function (el) {
const handleFirstImage = (args) => {
let {postData, html} = args;

const $html = $.load(html);
// const $html = $.load(html);

const $html = $.load(html, {
decodeEntities: false,
scriptingEnabled: false
}, false); // This `false` is `isDocument`. If `true`, <html>, <head>, and <body> elements are introduced

const firstContentElement = $html('*').first();

if (firstContentElement[0].name === 'img') {
Expand Down Expand Up @@ -87,7 +93,10 @@ const processHTMLContent = async (args) => {

html = autop(html);

const $html = $.load(html);
const $html = $.load(html, {
decodeEntities: false,
scriptingEnabled: false
}, false); // This `false` is `isDocument`. If `true`, <html>, <head>, and <body> elements are introduced

$html('div.separator').each((i, el) => {
$(el).replaceWith(`<hr><div>${$(el).html().trim()}</div>`);
Expand Down Expand Up @@ -183,7 +192,7 @@ const processHTMLContent = async (args) => {
$(el).replaceWith(`<p>${$(el).html().trim()}</p>`);
});

$html('div[style="style="text-align: center;"]').each((i, el) => {
$html('div[style="text-align: center;"]').each((i, el) => {
$(el).replaceWith(`<p>${$(el).html().trim()}</p>`);
});

Expand Down Expand Up @@ -216,7 +225,7 @@ const processHTMLContent = async (args) => {
});

$html('p').each((i, el) => {
if ($(el).html().trim() === '' || $(el).html() === '&#xA0;') {
if ($(el).html().trim() === '' || $(el).html() === '&#xA0;' || $(el).html().trim() === '&nbsp;') {
$(el).remove();
}
});
Expand Down Expand Up @@ -244,6 +253,9 @@ const processHTMLContent = async (args) => {
// Remove first element(s) if <hr>
html = html.replace(/^(<hr\/?> ?)+/gm, '').trim();

// Remove empty attributes
html = html.replace(/=""/g, '');

postData.html = html;

if (options?.firstImageAsFeatured) {
Expand Down
8 changes: 4 additions & 4 deletions packages/mg-blogger/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@tryghost/mg-blogger",
"version": "0.1.5",
"version": "0.1.8",
"repository": "https://github.com/TryGhost/migrate/tree/main/packages/mg-blogger",
"author": "Ghost Foundation",
"license": "MIT",
Expand All @@ -21,13 +21,13 @@
},
"devDependencies": {
"jest": "29.7.0",
"jest-extended": "4.0.1"
"jest-extended": "4.0.2"
},
"dependencies": {
"@tryghost/errors": "1.2.26",
"@tryghost/kg-default-cards": "9.1.5",
"@tryghost/kg-default-cards": "9.1.9",
"autop": "1.0.1",
"cheerio": "0.22.0",
"cheerio": "1.0.0-rc.12",
"node-fetch": "^3.3.1",
"sanitize-html": "2.11.0",
"simple-dom": "1.4.0"
Expand Down
Loading

0 comments on commit d18a0fb

Please sign in to comment.