Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added script and workflows to check broken links #2819

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
68 changes: 68 additions & 0 deletions .github/workflows/broken-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Check Broken Links

on:
pull_request:
types: [opened, synchronize, reopened]

permissions:
contents: read
pull-requests: write

jobs:
check-broken-links:
name: Broken Link Checker
runs-on: ubuntu-latest

steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Set Up Node.js
uses: actions/setup-node@v4
with:
node-version: 18

- name: Install Dependencies
run: |
cd broken-links-script
npm install

- name: Run Broken Link Checker
run: |
cd broken-links-script
node BrokenLinkChecker.js

- name: Parse and Comment Broken Links
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const path = './broken-links-script/broken_links_markdown.csv';
let body;
if (fs.existsSync(path)) {
const data = fs.readFileSync(path, 'utf8').trim();
const lines = data.split('\n').slice(1); // Skip the header line
const broken404Links = lines.filter(line => line.includes(',404')); // Filter only 404 errors
logu1411 marked this conversation as resolved.
Show resolved Hide resolved

if (broken404Links.length > 0) {
body = `### :warning: The following 404 broken links were found:\n\n${broken404Links
.map((line) => {
const [url, file, status] = line.split(',');
return `- **[${url}](${url})** in file \`${file}\` (Status: ${status})`;
})
.join('\n')}`;
} else {
body = 'No broken links with 404 status found.';
}
} else {
body = 'Error: Broken link checker did not produce a CSV report.';
}

github.rest.issues.createComment({
logu1411 marked this conversation as resolved.
Show resolved Hide resolved
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
126 changes: 126 additions & 0 deletions broken-links-script/BrokenLinkChecker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import fs from "fs";
import path from "path";
import fetch from "node-fetch";
import pLimit from "p-limit";

const getMarkdownFiles = (dir) => {
const files = fs.readdirSync(dir, { withFileTypes: true });
let markdownFiles = [];
for (const file of files) {
const fullPath = path.join(dir, file.name);
if (file.isDirectory()) {
markdownFiles = markdownFiles.concat(getMarkdownFiles(fullPath));
} else if (file.isFile() && file.name.endsWith(".md")) {
markdownFiles.push(fullPath);
}
}
return markdownFiles;
};

const BASE_URL = "https://cumulocity.com/docs";

const shortcodeMapping = {
"product-c8y-iot": "Cumulocity",
"c8y-edge-current-version-alt": "10.18",
"domain-c8y": "cumulocity.com",
"link-c8y-github": "https://github.com/Cumulocity-IoT/",
"c8y-edge-current-version-alt" : "10.18",
logu1411 marked this conversation as resolved.
Show resolved Hide resolved
"c8y-edge-current-version" : "1018",
"c8y-support-link" : "https://cumulocity.com/support",
"link-apamadoc-api" : "https://cumulocity.com/apama/docs/latest/related/ApamaDoc/index.html",
"link-c8y-doc-baseurl" : ".Page.Site.BaseURL",
"link-device-portal": "https://ecosystem.cumulocity.com/devices/?filter_cumulocity_certified=yes",
"c8y-support-link": "https://cumulocity.atlassian.net/servicedesk/customer/user/login?destination=portals",
logu1411 marked this conversation as resolved.
Show resolved Hide resolved
"link-apama-webhelp" : "https://cumulocity.com/apama/docs/latest",
"link-c8y-training": "https://cumulocity.moodlecloud.com/",
"c8y-resources-server-link": "https://download.cumulocity.com/",
"c8y-tech-community-link" : "https://techcommunity.cumulocity.com/",
"c8y-support-email" : "[email protected]",
"email-c8y-info" : "[email protected]"

};

const resolveHugoShortcode = (link) => {
return link.replace(/\{\{<\s*(.*?)\s*>\}\}/g, (match, shortcode) => {
const resolvedValue = shortcodeMapping[shortcode];
return resolvedValue !== undefined && resolvedValue !== null ? resolvedValue : "";
});
};

const resolveFullUrl = (link) => {
if (link.startsWith("mailto:") || link.startsWith("tel:")) {
return null;
}

const resolvedLink = resolveHugoShortcode(link);
if (resolvedLink.startsWith("http://") || resolvedLink.startsWith("https://")) {
return resolvedLink;
}

return `${BASE_URL.replace(/\/$/, "")}/${resolvedLink.replace(/^\//, "")}`;
};

const checkLink = async (link, mdFile) => {
const fullUrl = resolveFullUrl(link);
if (!fullUrl) {
return null;
}

try {
let response = await fetch(fullUrl, { method: "HEAD" });
if (response.status === 405) {
response = await fetch(fullUrl, { method: "GET" });
}

if (!response.ok) {
return { url: fullUrl, file: mdFile, status: response.status };
}
} catch (error) {
return { url: fullUrl, file: mdFile, status: "Error" };
}

return null;
};

(async () => {
const projectDir = ".././content";
logu1411 marked this conversation as resolved.
Show resolved Hide resolved
const markdownFiles = getMarkdownFiles(projectDir);

const brokenLinks = [];
const limit = pLimit(10);


const tasks = markdownFiles.map((mdFile) => {
const content = fs.readFileSync(mdFile, "utf8");
const links = [...content.matchAll(/(?<!\!)\[.*?\]\((.+?)\)/g)].map(
(match) => {
let link = match[1];
if (link.includes("(") && !link.endsWith(")")) {
link += ")"; // Add closing bracket if there's an opening bracket but no closing bracket
}
logu1411 marked this conversation as resolved.
Show resolved Hide resolved
return link;
}
);



return links.map((link) =>
limit(async () => {
const result = await checkLink(link, mdFile);
if (result) brokenLinks.push(result);
})
);
});

await Promise.all(tasks.flat());

const filteredBrokenLinks = brokenLinks.filter((link) => link.status === 404);

const csvData =
"URL,File Path,Status Code\n" +
filteredBrokenLinks.map((link) => `${link.url},${link.file},${link.status}`).join("\n");

fs.writeFileSync("broken_links_markdown.csv", csvData);
console.log("Broken links in markdown files saved to broken_links_markdown.csv");
process.exit(0);
})();
7 changes: 7 additions & 0 deletions broken-links-script/broken_links_markdown.csv
logu1411 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
URL,File Path,Status Code
https://github.com/Cumulocity-IoT//cumulocity-examples/snmp,..\content\change-logs\device-management\SNMP-removed-from-docs.md,404
https://openmobilealliance.org/iot/lightweight-m2m-lwm2m,..\content\protocol-integration\lwm2m-bundle\introduction.md,404
https://cumulocity.com/apama/docs/latest/related/ApamaDoc/index.htmlcom/apama/cumulocity/package-summary.html,..\content\streaming-analytics\epl-apps-bundle\basic-functionality.md,404
https://cumulocity.com/apama/docs/latest/related/ApamaDoc/index.html/com/apama/cumulocity/package-summary.html,..\content\streaming-analytics\epl-apps-bundle\basic-functionality.md,404
https://cumulocity.com/apama/docs/latest/related/ApamaDoc/index.htmlcom/softwareag/connectivity/httpclient/package-summary.html,..\content\streaming-analytics\epl-apps-bundle\microservices.md,404
https://cumulocity.com/docs/<https://en.wikipedia.org/wiki/Glob_(programming),..\content\web\upgrade-bundle\c8y-cli.md,404
Loading
Loading