datopian · rufuspollock · Nov 27, 2023 · Nov 25, 2023 · Nov 25, 2023 · Nov 25, 2023
diff --git a/.changeset/five-dots-speak.md b/.changeset/five-dots-speak.md
@@ -0,0 +1,6 @@
+---
+"mddb": minor
+---
+
+[ #60 , extract tasks ]
+Add tasks extraction from files. e.g `- [ ] task`
diff --git a/__mocks__/content/index.mdx b/__mocks__/content/index.mdx
@@ -6,3 +6,9 @@ tags: tag1, tag2, tag3
 # Welcome
 
 [link](blog0.mdx)
+
+- [] uncompleted task 1
+- [ ] uncompleted task 2
+
+- [x] completed task 1
+- [X] completed task 2
diff --git a/src/lib/databaseUtils.ts b/src/lib/databaseUtils.ts
@@ -1,11 +1,5 @@
 import { Knex } from "knex";
-import {
-  MddbFile,
-  MddbTag,
-  MddbLink,
-  MddbFileTag,
-  File,
-} from "./schema.js";
+import { MddbFile, MddbTag, MddbLink, MddbFileTag, File } from "./schema.js";
 import path from "path";
 import { WikiLink } from "./parseFile.js";
 
@@ -18,12 +12,12 @@
  }
 }

 export function mapFileToInsert(file: any) {
  const { _id, file_path, extension, url_path, filetype, metadata } = file;
  return { _id, file_path, extension, url_path, filetype, metadata };
 }

 export function mapLinksToInsert(filesToInsert: File[], file: any) {
  return file.links.map((link: WikiLink) => {
    let to: string | undefined;
    if (!link.internal) {
@@ -51,12 +45,12 @@
  });
 }

 export function isLinkToDefined(link: any) {
  return link.to !== undefined;
 }

 export function mapFileTagsToInsert(file: any) {
  return file.tags.map((tag: any) => ({
    file: file._id,
    tag: tag as unknown as string,
  }));

diff --git a/src/lib/parseFile.ts b/src/lib/parseFile.ts
@@ -24,6 +24,9 @@
   // Links
   const links = extractWikiLinks(ast, options);
 
+  const tasks = extractTasks(ast);
+  metadata.tasks = tasks;
+
   return {
     metadata,
     links,
@@ -61,7 +64,7 @@

  const nodes = selectAll("*", ast);
  for (let index = 0; index < nodes.length; index++) {
    const node: any = nodes[index];
    if (node.value) {
      const textTags = node.value.match(/(?:^|\s)(#(\w+|\/|-|_)+)/g);
      if (textTags) {
@@ -74,7 +77,7 @@
 };

 export interface LinkExtractors {
  [test: string]: (node: any) => WikiLink;
 }

 export interface WikiLink {
@@ -93,7 +96,7 @@
  const directory = path.dirname(from);

  const extractors: LinkExtractors = {
    link: (node: any) => {
      const to = !node.url.startsWith("http")
        ? path.posix.join(directory, node.url)
        : node.url;
@@ -106,7 +109,7 @@
        internal: !node.url.startsWith("http"),
      };
    },
    image: (node: any) => ({
      from: from,
      to: path.posix.join(directory, node.url),
      toRaw: node.url,
@@ -155,6 +158,40 @@
   return wikiLinks;
 };
 
+export interface Task {
+  description: string;
+  checked: boolean;
+}
+
+export const extractTasks = (ast: Root) => {
+  const nodes = selectAll("*", ast);
+  const tasks: Task[] = [];
+  nodes.map((node: any) => {
+    if (node.type === "listItem") {
+      const description = recursivelyExtractText(node).trim();
+      const checked = node.checked;
+      if (checked !== null) {
+        tasks.push({
+          description,
+          checked,
+        });
+      }
+    }
+  });
+
+  return tasks;
+};
+
+function recursivelyExtractText(node: any) {
+  if (node.value) {
+    return node.value;
+  } else if (node.children) {
+    return node.children.map(recursivelyExtractText).join(" ");
+  } else {
+    return "";
+  }
+}
+
 // links = extractWikiLinks({
 //   source,
 //   // TODO pass slug instead of file path as hrefs/srcs are sluggified too

diff --git a/src/tests/extractTasks.spec.ts b/src/tests/extractTasks.spec.ts
@@ -0,0 +1,72 @@
+import { extractTasks, processAST } from "../lib/parseFile";
+
+const getTasksFromSource = (source: string) => {
+  const ast = processAST(source, {});
+  const tasks = extractTasks(ast);
+  return tasks;
+};
+
+describe("extractTasks", () => {
+  test("should extract uncompleted tasks from body", () => {
+    const tasks = getTasksFromSource(
+      "- [] uncompleted task 1\n- [ ] uncompleted task 2"
+    );
+    const expectedTasks = [
+      { description: "uncompleted task 2", checked: false },
+    ];
+    expect(tasks).toEqual(expectedTasks);
+  });
+
+  test("should extract completed tasks from body", () => {
+    const tasks = getTasksFromSource(
+      "- [x] completed task 1\n- [X] completed task 2"
+    );
+    const expectedTasks = [
+      { description: "completed task 1", checked: true },
+      { description: "completed task 2", checked: true },
+    ];
+    expect(tasks).toEqual(expectedTasks);
+  });
+
+  test("should handle mixed completed and uncompleted tasks", () => {
+    const tasks = getTasksFromSource(
+      "- [x] completed task\n- [ ] uncompleted task"
+    );
+    const expectedTasks = [
+      { description: "completed task", checked: true },
+      { description: "uncompleted task", checked: false },
+    ];
+    expect(tasks).toEqual(expectedTasks);
+  });
+
+  test("should handle tasks with leading and trailing spaces", () => {
+    const tasks = getTasksFromSource(
+      "- [x]  completed task  \n- [ ]  uncompleted task  "
+    );
+    const expectedTasks = [
+      { description: "completed task", checked: true },
+      { description: "uncompleted task", checked: false },
+    ];
+    expect(tasks).toEqual(expectedTasks);
+  });
+
+  test("should handle tasks with different checkbox formats", () => {
+    const tasks = getTasksFromSource(
+      "- [x] task 1\n- [X] task 2\n- [ ] task 3"
+    );
+    const expectedTasks = [
+      { description: "task 1", checked: true },
+      { description: "task 2", checked: true },
+      { description: "task 3", checked: false },
+    ];
+    expect(tasks).toEqual(expectedTasks);
+  });
+
+  test("should handle tasks with special characters", () => {
+    const tasks = getTasksFromSource("- [x] task with $pecial character$");
+    const expectedTasks = [
+      { description: "task with $pecial character$", checked: true },
+    ];
+    expect(tasks).toEqual(expectedTasks);
+  });
+});
diff --git a/src/tests/parseFile.spec.ts b/src/tests/parseFile.spec.ts
@@ -10,6 +10,8 @@ tags: a, b, c
 [[blog/Some Other Link]]
 [[blog/Some Other Link|Page Alias]]
 ![[Some Image.png]]
+- [ ] uncompleted task
+- [x] completed task
 `;
 
 describe("parseFile", () => {
@@ -18,6 +20,10 @@ describe("parseFile", () => {
       title: "Hello World",
       authors: ["John Doe", "Jane Doe"],
       tags: ["a", "b", "c"],
+      tasks: [
+        { description: "uncompleted task", checked: false },
+        { description: "completed task", checked: true },
+      ],
     };
     const expectedLinks = [
       {
@@ -63,6 +69,10 @@ describe("parseFile", () => {
       title: "Hello World",
       authors: ["John Doe", "Jane Doe"],
       tags: ["a", "b", "c"],
+      tasks: [
+        { description: "uncompleted task", checked: false },
+        { description: "completed task", checked: true },
+      ],
     };
     const expectedLinks = [
       {

diff --git a/src/tests/process.spec.ts b/src/tests/process.spec.ts
@@ -21,6 +21,20 @@ describe("Can parse a file and get file info", () => {
     expect(fileInfo.metadata).toEqual({
       title: "Homepage",
       tags: ["tag1", "tag2", "tag3"],
+      tasks: [
+        {
+          checked: false,
+          description: "uncompleted task 2",
+        },
+        {
+          checked: true,
+          description: "completed task 1",
+        },
+        {
+          checked: true,
+          description: "completed task 2",
+        },
+      ],
     });
     expect(fileInfo.links).toEqual([
       {