Skip to content

Commit

Permalink
feat: extract word count metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
luissimas committed Jul 2, 2024
1 parent bed96ae commit c68fc9c
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 74 deletions.
4 changes: 3 additions & 1 deletion internal/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func (c *Collector) CollectMetrics(root fs.FS, collectionTime time.Time) error {
func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) {
var noteCount uint
var linkCount uint
var wordCount uint
notes := make(map[string]metrics.NoteMetrics)

err := fs.WalkDir(root, ".", func(path string, dir fs.DirEntry, err error) error {
Expand Down Expand Up @@ -83,6 +84,7 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) {
metrics := CollectNoteMetrics(content)
notes[path] = metrics
linkCount += metrics.LinkCount
wordCount += metrics.WordCount
noteCount += 1

slog.Debug("collected metrics from file", slog.String("path", path), slog.Any("d", dir), slog.Any("err", err))
Expand All @@ -95,5 +97,5 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) {
return metrics.Metrics{}, err
}

return metrics.Metrics{NoteCount: noteCount, LinkCount: linkCount, Notes: notes}, nil
return metrics.Metrics{NoteCount: noteCount, LinkCount: linkCount, WordCount: wordCount, Notes: notes}, nil
}
25 changes: 17 additions & 8 deletions internal/collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,31 @@ Link to [one](./one.md) and also a full link [[./dir1/dir2/three]] and a [[./dir
expected := metrics.Metrics{
NoteCount: 4,
LinkCount: 8,
WordCount: 43,
Notes: map[string]metrics.NoteMetrics{
"zettel/one.md": {
Links: map[string]uint{"./dir1/two.md": 2},
LinkCount: 2,
Links: map[string]uint{"./dir1/two.md": 2},
LinkCount: 2,
WordCount: 13,
BacklinkCount: 0,
},
"zettel/dir1/two.md": {
Links: map[string]uint{"one": 1},
LinkCount: 1,
Links: map[string]uint{"one": 1},
LinkCount: 1,
WordCount: 5,
BacklinkCount: 0,
},
"zettel/dir1/dir2/three.md": {
Links: map[string]uint{"one": 1, "two": 1},
LinkCount: 2,
Links: map[string]uint{"one": 1, "two": 1},
LinkCount: 2,
WordCount: 10,
BacklinkCount: 0,
},
"zettel/four.md": {
Links: map[string]uint{"./one.md": 1, "./dir1/dir2/three": 1, "./dir1/two.md": 1},
LinkCount: 3,
Links: map[string]uint{"./one.md": 1, "./dir1/dir2/three": 1, "./dir1/two.md": 1},
LinkCount: 3,
WordCount: 15,
BacklinkCount: 0,
},
},
}
Expand Down
92 changes: 55 additions & 37 deletions internal/collector/note.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package collector
import (
"log/slog"
"net/url"
"slices"
"path/filepath"
"strings"
"unicode"

"github.com/luissimas/zettelkasten-exporter/internal/metrics"
"github.com/yuin/goldmark"
Expand All @@ -19,54 +21,70 @@ var md = goldmark.New(
)

func CollectNoteMetrics(content []byte) metrics.NoteMetrics {
var linkCount uint
links := collectLinks(content)
for _, v := range links {
linkCount += v
noteMetrics := metrics.NoteMetrics{
Links: make(map[string]uint),
LinkCount: 0,
WordCount: 0,
BacklinkCount: 0,
}
return metrics.NoteMetrics{Links: links, LinkCount: linkCount}
}

func collectLinks(content []byte) map[string]uint {
linkKinds := []ast.NodeKind{ast.KindLink, wikilink.Kind}
reader := text.NewReader(content)
root := md.Parser().Parse(reader)
links := make(map[string]uint)
err := ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if entering && slices.Contains(linkKinds, n.Kind()) {
var target string
switch v := n.(type) {
case *ast.Link:
target = string(v.Destination)
case *wikilink.Node:
if v.Embed {
return ast.WalkContinue, nil
}
target = string(v.Target)
default:
return ast.WalkContinue, nil
}
if !entering {
return ast.WalkContinue, nil
}

linkTarget := ""

switch v := n.(type) {
case *ast.Link:
linkTarget = string(v.Destination)
case *wikilink.Node:
linkTarget = string(v.Target)
case *ast.Paragraph, *ast.ListItem:
text := string(n.Text(content))
fields := strings.FieldsFunc(string(text), func(r rune) bool { return unicode.IsSpace(r) || r == '\n' })
noteMetrics.WordCount += uint(len(fields))
default:
return ast.WalkContinue, nil
}

if isUrl(target) {
return ast.WalkContinue, nil
}
if !isNoteTarget(linkTarget) {
return ast.WalkContinue, nil
}

v, ok := links[target]
if !ok {
links[target] = 0
}
links[target] = v + 1
v, ok := noteMetrics.Links[linkTarget]
if !ok {
noteMetrics.Links[linkTarget] = 0
}
noteMetrics.Links[linkTarget] = v + 1
return ast.WalkContinue, nil
})
if err != nil {
slog.Error("Error walking note AST", slog.Any("error", err))
}
slog.Debug("Collected links", slog.Any("links", links))
return links
for _, linkCount := range noteMetrics.Links {
noteMetrics.LinkCount += linkCount
}
return noteMetrics
}

func isUrl(s string) bool {
u, err := url.Parse(s)
return err == nil && u.Scheme != "" && u.Host != ""
// isNoteTarget determines whether a link target points to a markdown note.
func isNoteTarget(target string) bool {
// Empty strings are not valid targets
if target == "" {
return false
}

// Check if target is a URL
u, err := url.Parse(target)
isUrl := err == nil && u.Scheme != "" && u.Host != ""
if isUrl {
return false
}

// Check if target is either a markdown file or has no extension
extension := filepath.Ext(target)
isNoteTarget := extension == "" || extension == ".md"
return isNoteTarget
}
101 changes: 76 additions & 25 deletions internal/collector/note_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,64 +17,115 @@ func TestCollectNoteMetrics(t *testing.T) {
name: "empty file",
content: "",
expected: metrics.NoteMetrics{
Links: map[string]uint{},
LinkCount: 0,
Links: map[string]uint{},
LinkCount: 0,
WordCount: 0,
BacklinkCount: 0,
},
},
{
name: "wiki links",
content: "[[Link]]aksdjf[[something|another]]\n[[link]]",
name: "wiki links",
content: `
[[Link]] some words [[something|another]]
another [[link]]`,
expected: metrics.NoteMetrics{
Links: map[string]uint{"Link": 1, "something": 1, "link": 1},
LinkCount: 3,
Links: map[string]uint{"Link": 1, "something": 1, "link": 1},
LinkCount: 3,
WordCount: 6,
BacklinkCount: 0,
},
},
{
name: "markdown link",
content: "[Link](target.md)",
expected: metrics.NoteMetrics{
Links: map[string]uint{"target.md": 1},
LinkCount: 1,
Links: map[string]uint{"target.md": 1},
LinkCount: 1,
WordCount: 1,
BacklinkCount: 0,
},
},
{
name: "mixed links",
content: "okok[Link](target.md)\n**ddk**[[linked]]`test`[[another|link]]\n\n[test](yet-another.md)",
name: "repeated links",
content: "[[target.md|link]] [link](target.md) [[link]]",
expected: metrics.NoteMetrics{
Links: map[string]uint{"target.md": 1, "linked": 1, "another": 1, "yet-another.md": 1},
LinkCount: 4,
Links: map[string]uint{"target.md": 2, "link": 1},
LinkCount: 3,
WordCount: 3,
BacklinkCount: 0,
},
},
{
name: "repeated links",
content: "[[target.md|link]]\n[link](target.md)\n[[link]]",
name: "ignore links to non markdown files",
content: "![[note.md]] [[test.pdf]] ![[target.png]] ![](another.jpeg) [[link]] [](link)",
expected: metrics.NoteMetrics{
Links: map[string]uint{"target.md": 2, "link": 1},
LinkCount: 3,
Links: map[string]uint{"link": 2, "note.md": 1},
LinkCount: 3,
WordCount: 4,
BacklinkCount: 0,
},
},
{
name: "ignore embeddedlinks",
content: "![[target.png]]\n![](another.jpeg)\n[[link]]",
name: "ignore http links",
content: "[[one]] [this is an http link](https://go.dev/) [[not/an/http/link]]",
expected: metrics.NoteMetrics{
Links: map[string]uint{"link": 1},
LinkCount: 1,
Links: map[string]uint{"one": 1, "not/an/http/link": 1},
LinkCount: 2,
WordCount: 7,
BacklinkCount: 0,
},
},
{
name: "ignore http links",
content: "[[one]][this is an http link](https://go.dev/)[[not/an/http/link]]",
name: "mixed links",
content: `
Ok [Link](target.md).
Another paragraph **bold text** and [[linked]] /test/ [[another|link]].
> Quote in [test](yet-another.md)
A list
- One [[link-unordered.md]]
- Two
Another list:
1. First
2. Second [link](link-ordered.md)`,
expected: metrics.NoteMetrics{
Links: map[string]uint{"target.md": 1, "linked": 1, "another": 1, "yet-another.md": 1, "link-unordered.md": 1, "link-ordered.md": 1},
LinkCount: 6,
WordCount: 23,
BacklinkCount: 0,
},
},
{
name: "long note",
content: `
Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.
Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.
Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.
Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.
Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.`,
expected: metrics.NoteMetrics{
Links: map[string]uint{"one": 1, "not/an/http/link": 1},
LinkCount: 1,
Links: map[string]uint{},
LinkCount: 0,
WordCount: 525,
BacklinkCount: 0,
},
},
}

for _, d := range data {
t.Run(d.name, func(t *testing.T) {
result := CollectNoteMetrics([]byte(d.content))
assert.Equal(t, d.expected.Links, result.Links)
assert.Equal(t, d.expected, result)
})
}
}
7 changes: 5 additions & 2 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package metrics
type Metrics struct {
NoteCount uint
LinkCount uint
WordCount uint
Notes map[string]NoteMetrics
}

type NoteMetrics struct {
Links map[string]uint
LinkCount uint
Links map[string]uint
LinkCount uint
WordCount uint
BacklinkCount uint
}
6 changes: 5 additions & 1 deletion internal/storage/influxdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ func (i InfluxDBStorage) WriteMetric(noteName string, metric metrics.NoteMetrics
point := influxdb2.NewPoint(
measurementName,
map[string]string{"name": noteName},
map[string]interface{}{"link_count": metric.LinkCount},
map[string]interface{}{
"link_count": metric.LinkCount,
"word_count": metric.WordCount,
"backlink_count": metric.BacklinkCount,
},
timestamp,
)
i.writeAPI.WritePoint(point)
Expand Down

0 comments on commit c68fc9c

Please sign in to comment.