diff --git a/internal/collector/collector.go b/internal/collector/collector.go index cae0fdb..ca5c2b1 100644 --- a/internal/collector/collector.go +++ b/internal/collector/collector.go @@ -49,6 +49,7 @@ func (c *Collector) CollectMetrics(root fs.FS, collectionTime time.Time) error { func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { var noteCount uint var linkCount uint + var wordCount uint notes := make(map[string]metrics.NoteMetrics) err := fs.WalkDir(root, ".", func(path string, dir fs.DirEntry, err error) error { @@ -83,6 +84,7 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { metrics := CollectNoteMetrics(content) notes[path] = metrics linkCount += metrics.LinkCount + wordCount += metrics.WordCount noteCount += 1 slog.Debug("collected metrics from file", slog.String("path", path), slog.Any("d", dir), slog.Any("err", err)) @@ -95,5 +97,5 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { return metrics.Metrics{}, err } - return metrics.Metrics{NoteCount: noteCount, LinkCount: linkCount, Notes: notes}, nil + return metrics.Metrics{NoteCount: noteCount, LinkCount: linkCount, WordCount: wordCount, Notes: notes}, nil } diff --git a/internal/collector/collector_test.go b/internal/collector/collector_test.go index 34b4149..6540054 100644 --- a/internal/collector/collector_test.go +++ b/internal/collector/collector_test.go @@ -53,22 +53,31 @@ Link to [one](./one.md) and also a full link [[./dir1/dir2/three]] and a [[./dir expected := metrics.Metrics{ NoteCount: 4, LinkCount: 8, + WordCount: 43, Notes: map[string]metrics.NoteMetrics{ "zettel/one.md": { - Links: map[string]uint{"./dir1/two.md": 2}, - LinkCount: 2, + Links: map[string]uint{"./dir1/two.md": 2}, + LinkCount: 2, + WordCount: 13, + BacklinkCount: 0, }, "zettel/dir1/two.md": { - Links: map[string]uint{"one": 1}, - LinkCount: 1, + Links: map[string]uint{"one": 1}, + LinkCount: 1, + WordCount: 5, + BacklinkCount: 0, }, "zettel/dir1/dir2/three.md": { - Links: map[string]uint{"one": 1, "two": 1}, - LinkCount: 2, + Links: map[string]uint{"one": 1, "two": 1}, + LinkCount: 2, + WordCount: 10, + BacklinkCount: 0, }, "zettel/four.md": { - Links: map[string]uint{"./one.md": 1, "./dir1/dir2/three": 1, "./dir1/two.md": 1}, - LinkCount: 3, + Links: map[string]uint{"./one.md": 1, "./dir1/dir2/three": 1, "./dir1/two.md": 1}, + LinkCount: 3, + WordCount: 15, + BacklinkCount: 0, }, }, } diff --git a/internal/collector/note.go b/internal/collector/note.go index c83eb2a..9e3ae8e 100644 --- a/internal/collector/note.go +++ b/internal/collector/note.go @@ -3,7 +3,9 @@ package collector import ( "log/slog" "net/url" - "slices" + "path/filepath" + "strings" + "unicode" "github.com/luissimas/zettelkasten-exporter/internal/metrics" "github.com/yuin/goldmark" @@ -19,54 +21,70 @@ var md = goldmark.New( ) func CollectNoteMetrics(content []byte) metrics.NoteMetrics { - var linkCount uint - links := collectLinks(content) - for _, v := range links { - linkCount += v + noteMetrics := metrics.NoteMetrics{ + Links: make(map[string]uint), + LinkCount: 0, + WordCount: 0, + BacklinkCount: 0, } - return metrics.NoteMetrics{Links: links, LinkCount: linkCount} -} - -func collectLinks(content []byte) map[string]uint { - linkKinds := []ast.NodeKind{ast.KindLink, wikilink.Kind} reader := text.NewReader(content) root := md.Parser().Parse(reader) - links := make(map[string]uint) err := ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) { - if entering && slices.Contains(linkKinds, n.Kind()) { - var target string - switch v := n.(type) { - case *ast.Link: - target = string(v.Destination) - case *wikilink.Node: - if v.Embed { - return ast.WalkContinue, nil - } - target = string(v.Target) - default: - return ast.WalkContinue, nil - } + if !entering { + return ast.WalkContinue, nil + } + + linkTarget := "" + + switch v := n.(type) { + case *ast.Link: + linkTarget = string(v.Destination) + case *wikilink.Node: + linkTarget = string(v.Target) + case *ast.Paragraph, *ast.ListItem: + text := string(n.Text(content)) + fields := strings.FieldsFunc(string(text), func(r rune) bool { return unicode.IsSpace(r) || r == '\n' }) + noteMetrics.WordCount += uint(len(fields)) + default: + return ast.WalkContinue, nil + } - if isUrl(target) { - return ast.WalkContinue, nil - } + if !isNoteTarget(linkTarget) { + return ast.WalkContinue, nil + } - v, ok := links[target] - if !ok { - links[target] = 0 - } - links[target] = v + 1 + v, ok := noteMetrics.Links[linkTarget] + if !ok { + noteMetrics.Links[linkTarget] = 0 } + noteMetrics.Links[linkTarget] = v + 1 return ast.WalkContinue, nil }) if err != nil { slog.Error("Error walking note AST", slog.Any("error", err)) } - slog.Debug("Collected links", slog.Any("links", links)) - return links + for _, linkCount := range noteMetrics.Links { + noteMetrics.LinkCount += linkCount + } + return noteMetrics } -func isUrl(s string) bool { - u, err := url.Parse(s) - return err == nil && u.Scheme != "" && u.Host != "" +// isNoteTarget determines whether a link target points to a markdown note. +func isNoteTarget(target string) bool { + // Empty strings are not valid targets + if target == "" { + return false + } + + // Check if target is a URL + u, err := url.Parse(target) + isUrl := err == nil && u.Scheme != "" && u.Host != "" + if isUrl { + return false + } + + // Check if target is either a markdown file or has no extension + extension := filepath.Ext(target) + isNoteTarget := extension == "" || extension == ".md" + return isNoteTarget } diff --git a/internal/collector/note_test.go b/internal/collector/note_test.go index abc6821..871babe 100644 --- a/internal/collector/note_test.go +++ b/internal/collector/note_test.go @@ -17,56 +17,107 @@ func TestCollectNoteMetrics(t *testing.T) { name: "empty file", content: "", expected: metrics.NoteMetrics{ - Links: map[string]uint{}, - LinkCount: 0, + Links: map[string]uint{}, + LinkCount: 0, + WordCount: 0, + BacklinkCount: 0, }, }, { - name: "wiki links", - content: "[[Link]]aksdjf[[something|another]]\n[[link]]", + name: "wiki links", + content: ` +[[Link]] some words [[something|another]] + +another [[link]]`, expected: metrics.NoteMetrics{ - Links: map[string]uint{"Link": 1, "something": 1, "link": 1}, - LinkCount: 3, + Links: map[string]uint{"Link": 1, "something": 1, "link": 1}, + LinkCount: 3, + WordCount: 6, + BacklinkCount: 0, }, }, { name: "markdown link", content: "[Link](target.md)", expected: metrics.NoteMetrics{ - Links: map[string]uint{"target.md": 1}, - LinkCount: 1, + Links: map[string]uint{"target.md": 1}, + LinkCount: 1, + WordCount: 1, + BacklinkCount: 0, }, }, { - name: "mixed links", - content: "okok[Link](target.md)\n**ddk**[[linked]]`test`[[another|link]]\n\n[test](yet-another.md)", + name: "repeated links", + content: "[[target.md|link]] [link](target.md) [[link]]", expected: metrics.NoteMetrics{ - Links: map[string]uint{"target.md": 1, "linked": 1, "another": 1, "yet-another.md": 1}, - LinkCount: 4, + Links: map[string]uint{"target.md": 2, "link": 1}, + LinkCount: 3, + WordCount: 3, + BacklinkCount: 0, }, }, { - name: "repeated links", - content: "[[target.md|link]]\n[link](target.md)\n[[link]]", + name: "ignore links to non markdown files", + content: "![[note.md]] [[test.pdf]] ![[target.png]] ![](another.jpeg) [[link]] [](link)", expected: metrics.NoteMetrics{ - Links: map[string]uint{"target.md": 2, "link": 1}, - LinkCount: 3, + Links: map[string]uint{"link": 2, "note.md": 1}, + LinkCount: 3, + WordCount: 4, + BacklinkCount: 0, }, }, { - name: "ignore embeddedlinks", - content: "![[target.png]]\n![](another.jpeg)\n[[link]]", + name: "ignore http links", + content: "[[one]] [this is an http link](https://go.dev/) [[not/an/http/link]]", expected: metrics.NoteMetrics{ - Links: map[string]uint{"link": 1}, - LinkCount: 1, + Links: map[string]uint{"one": 1, "not/an/http/link": 1}, + LinkCount: 2, + WordCount: 7, + BacklinkCount: 0, }, }, { - name: "ignore http links", - content: "[[one]][this is an http link](https://go.dev/)[[not/an/http/link]]", + name: "mixed links", + content: ` +Ok [Link](target.md). + +Another paragraph **bold text** and [[linked]] /test/ [[another|link]]. + +> Quote in [test](yet-another.md) + +A list + +- One [[link-unordered.md]] +- Two + +Another list: + +1. First +2. Second [link](link-ordered.md)`, + expected: metrics.NoteMetrics{ + Links: map[string]uint{"target.md": 1, "linked": 1, "another": 1, "yet-another.md": 1, "link-unordered.md": 1, "link-ordered.md": 1}, + LinkCount: 6, + WordCount: 23, + BacklinkCount: 0, + }, + }, + { + name: "long note", + content: ` +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.`, expected: metrics.NoteMetrics{ - Links: map[string]uint{"one": 1, "not/an/http/link": 1}, - LinkCount: 1, + Links: map[string]uint{}, + LinkCount: 0, + WordCount: 525, + BacklinkCount: 0, }, }, } @@ -74,7 +125,7 @@ func TestCollectNoteMetrics(t *testing.T) { for _, d := range data { t.Run(d.name, func(t *testing.T) { result := CollectNoteMetrics([]byte(d.content)) - assert.Equal(t, d.expected.Links, result.Links) + assert.Equal(t, d.expected, result) }) } } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 71fc36d..f6279b6 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -3,10 +3,13 @@ package metrics type Metrics struct { NoteCount uint LinkCount uint + WordCount uint Notes map[string]NoteMetrics } type NoteMetrics struct { - Links map[string]uint - LinkCount uint + Links map[string]uint + LinkCount uint + WordCount uint + BacklinkCount uint } diff --git a/internal/storage/influxdb.go b/internal/storage/influxdb.go index d7c4cf2..755a0f5 100644 --- a/internal/storage/influxdb.go +++ b/internal/storage/influxdb.go @@ -29,7 +29,11 @@ func (i InfluxDBStorage) WriteMetric(noteName string, metric metrics.NoteMetrics point := influxdb2.NewPoint( measurementName, map[string]string{"name": noteName}, - map[string]interface{}{"link_count": metric.LinkCount}, + map[string]interface{}{ + "link_count": metric.LinkCount, + "word_count": metric.WordCount, + "backlink_count": metric.BacklinkCount, + }, timestamp, ) i.writeAPI.WritePoint(point)