broken but better

robinovitch61 · Jan 20, 2025 · 282b9e6 · 282b9e6
1 parent 225ba5b
commit 282b9e6
Show file tree

Hide file tree

Showing 6 changed files with 284 additions and 85 deletions.
diff --git a/cpu.prof b/cpu.prof
diff --git a/internal/viewport/linebuffer/linebuffer.go b/internal/viewport/linebuffer/linebuffer.go
@@ -9,16 +9,16 @@ import (
 )
 
 // LineBuffer provides functionality to get sequential strings of a specified terminal width, accounting
-// for the ansi escape codes styling the content.
+// for the ansi escape codes styling the Content.
 type LineBuffer struct {
 	Content             string  // underlying string with ansi codes. utf-8 bytes
 	leftRuneIdx         int     // left plaintext rune idx to start next PopLeft result from
-	lineNoAnsi          string  // line without ansi codes. utf-8 bytes
+	lineNoAnsi          string  // Content without ansi codes. utf-8 bytes
 	lineNoAnsiRunes     []rune  // runes of lineNoAnsi. len(lineNoAnsiRunes) == len(lineNoAnsiWidths)
 	runeIdxToByteOffset []int   // idx of lineNoAnsiRunes to byte offset. len(runeIdxToByteOffset) == len(lineNoAnsiRunes)
 	lineNoAnsiWidths    []int   // terminal cell widths of lineNoAnsi. len(lineNoAnsiWidths) == len(lineNoAnsiRunes)
 	lineNoAnsiCumWidths []int   // cumulative lineNoAnsiWidths
-	ansiCodeIndexes     [][]int // slice of startByte, endByte indexes of ansi codes in the line
+	ansiCodeIndexes     [][]int // slice of startByte, endByte indexes of ansi codes in the Content
 }
 
 func New(line string) LineBuffer {
@@ -27,31 +27,70 @@ func New(line string) LineBuffer {
 		leftRuneIdx: 0,
 	}
 
-	lb.ansiCodeIndexes = constants.AnsiRegex.FindAllStringIndex(line, -1)
-	lb.lineNoAnsi = stripAnsi(line)
+	lb.ansiCodeIndexes = findAnsiRanges(line)
 
-	lb.lineNoAnsiRunes = []rune(lb.lineNoAnsi)
-	n := len(lb.lineNoAnsiRunes)
-	lb.runeIdxToByteOffset = make([]int, n+1)
-	lb.lineNoAnsiWidths = make([]int, n)
-	lb.lineNoAnsiCumWidths = make([]int, n)
+	if len(lb.ansiCodeIndexes) > 0 {
+		totalLen := len(line)
+		for _, r := range lb.ansiCodeIndexes {
+			totalLen -= r[1] - r[0]
+		}
+
+		buf := make([]byte, 0, totalLen)
+		lastPos := 0
+		for _, r := range lb.ansiCodeIndexes {
+			buf = append(buf, line[lastPos:r[0]]...)
+			lastPos = r[1]
+		}
+		buf = append(buf, line[lastPos:]...)
+		lb.lineNoAnsi = string(buf)
+	} else {
+		lb.lineNoAnsi = line
+	}
+
+	n := utf8.RuneCountInString(lb.lineNoAnsi)
+
+	// single allocation for all integer slices
+	combined := make([]int, n+1+n+n)
+	lb.runeIdxToByteOffset = combined[:n+1]
+	lb.lineNoAnsiWidths = combined[n+1 : n+1+n]
+	lb.lineNoAnsiCumWidths = combined[n+1+n:]
+
+	lb.lineNoAnsiRunes = make([]rune, n)
 
 	currentOffset := 0
 	cumWidth := 0
-	for i, r := range lb.lineNoAnsiRunes {
+	i := 0
+	for _, r := range lb.lineNoAnsi {
 		lb.runeIdxToByteOffset[i] = currentOffset
 		currentOffset += utf8.RuneLen(r)
-
+		lb.lineNoAnsiRunes[i] = r
 		width := runewidth.RuneWidth(r)
 		lb.lineNoAnsiWidths[i] = width
 		cumWidth += width
 		lb.lineNoAnsiCumWidths[i] = cumWidth
+		i++
 	}
 	lb.runeIdxToByteOffset[n] = currentOffset
 
 	return lb
 }
 
+func ToLineBuffers(lines []string) []LineBuffer {
+	res := make([]LineBuffer, len(lines))
+	for i, line := range lines {
+		res[i] = New(line)
+	}
+	return res
+}
+
+func ToStrings(lbs []LineBuffer) []string {
+	res := make([]string, len(lbs))
+	for i, lb := range lbs {
+		res[i] = lb.Content
+	}
+	return res
+}
+
 func (l LineBuffer) TotalLines(width int) int {
 	if width == 0 {
 		return 0
@@ -68,7 +107,7 @@ func (l *LineBuffer) SeekToLine(n, width int) {
 }
 
 func (l *LineBuffer) SeekToWidth(width int) {
-	// width can go past end, in which case PopLeft() returns "". Required when e.g. panning past line's end.
+	// width can go past end, in which case PopLeft() returns "". Required when e.g. panning past Content's end.
 	if width <= 0 {
 		l.leftRuneIdx = 0
 		return
@@ -137,7 +176,7 @@ func (l *LineBuffer) PopLeft(width int, continuation, toHighlight string, highli
 
 	res := result.String()
 
-	// apply left/right line continuation indicators
+	// apply left/right Content continuation indicators
 	if len(continuation) > 0 && (startRuneIdx > 0 || l.leftRuneIdx < len(l.lineNoAnsiRunes)) {
 		continuationRunes := []rune(continuation)
 
@@ -274,8 +313,8 @@ func reapplyAnsi(original, truncated string, truncByteOffset int, ansiCodeIndexe
 	return string(result)
 }
 
-// highlightLine highlights a string in a line that potentially has ansi codes in it without disrupting them
-// start and end are the byte offsets for which highlighting is considered in the line, not counting ansi codes
+// highlightLine highlights a string in a Content that potentially has ansi codes in it without disrupting them
+// start and end are the byte offsets for which highlighting is considered in the Content, not counting ansi codes
 func highlightLine(line, highlight string, highlightStyle lipgloss.Style, start, end int) string {
 	if line == "" || highlight == "" {
 		return line
@@ -339,7 +378,28 @@ func highlightLine(line, highlight string, highlightStyle lipgloss.Style, start,
 }
 
 func stripAnsi(input string) string {
-	return constants.AnsiRegex.ReplaceAllString(input, "")
+	ranges := findAnsiRanges(input)
+	if len(ranges) == 0 {
+		return input
+	}
+
+	totalAnsiLen := 0
+	for _, r := range ranges {
+		totalAnsiLen += r[1] - r[0]
+	}
+
+	finalLen := len(input) - totalAnsiLen
+	var builder strings.Builder
+	builder.Grow(finalLen)
+
+	lastPos := 0
+	for _, r := range ranges {
+		builder.WriteString(input[lastPos:r[0]])
+		lastPos = r[1]
+	}
+
+	builder.WriteString(input[lastPos:])
+	return builder.String()
 }
 
 func simplifyAnsiCodes(ansis []string) []string {
@@ -377,18 +437,6 @@ func simplifyAnsiCodes(ansis []string) []string {
 	return ansis
 }
 
-func initByteOffsets(runes []rune) []int {
-	offsets := make([]int, len(runes)+1)
-	currentOffset := 0
-	for i, r := range runes {
-		offsets[i] = currentOffset
-		runeLen := utf8.RuneLen(r)
-		currentOffset += runeLen
-	}
-	offsets[len(runes)] = currentOffset
-	return offsets
-}
-
 // overflowsLeft checks if a substring overflows a string on the left if the string were to start at startByteIdx inclusive.
 // assumes s has no ansi codes.
 // It performs a case-sensitive comparison and returns two values:
@@ -577,3 +625,41 @@ func replaceRuneWithRunes(rs []rune, idxToReplace int, replaceWith []rune) []run
 	copy(result[idxToReplace+len(replaceWith):], rs[idxToReplace+1:])
 	return result
 }
+
+func findAnsiRanges(s string) [][]int {
+	// pre-count to allocate exact size
+	count := strings.Count(s, "\x1b[")
+	if count == 0 {
+		return nil
+	}
+
+	allRanges := make([]int, count*2)
+	ranges := make([][]int, count)
+
+	for i := 0; i < count; i++ {
+		ranges[i] = allRanges[i*2 : i*2+2]
+	}
+
+	rangeIdx := 0
+	for i := 0; i < len(s); {
+		if i+1 < len(s) && s[i] == '\x1b' && s[i+1] == '[' {
+			start := i
+			i += 2 // skip \x1b[
+
+			// find the 'm' that ends this sequence
+			for i < len(s) && s[i] != 'm' {
+				i++
+			}
+
+			if i < len(s) && s[i] == 'm' {
+				allRanges[rangeIdx*2] = start
+				allRanges[rangeIdx*2+1] = i + 1
+				rangeIdx++
+				i++
+				continue
+			}
+		}
+		i++
+	}
+	return ranges[:rangeIdx]
+}
diff --git a/internal/viewport/linebuffer/linebuffer_bench_test.go b/internal/viewport/linebuffer/linebuffer_bench_test.go
@@ -0,0 +1,105 @@
+package linebuffer
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+)
+
+// Example of interpreting output of `go test -v -bench=. -run=^$ -benchmem ./internal/linebuffer`
+// BenchmarkNewLongLine-8    7842	    152640 ns/op	  904063 B/op	       8 allocs/op
+// - 7842: benchmark ran 7,842 iterations to get a stable measurement
+// - 152640 ns/op: each call to New() takes about 153 microseconds
+// - 904063 B/op: each operation allocates about 904KB of memory
+// - 8 allocs/op: each call to New() makes 8 distinct memory allocations
+
+func BenchmarkNewLongLine(b *testing.B) {
+	base := strings.Repeat("hi there random words woohoo ", 1000)
+
+	// reset timer to exclude setup time
+	b.ResetTimer()
+
+	// enable memory profiling
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		lb := New(base)
+		// prevent compiler optimizations from eliminating the call
+		_ = lb
+	}
+}
+
+func BenchmarkMemoryComparisonAscii(b *testing.B) {
+	// tests different string lengths to see how memory usage scales
+	sizes := []int{10, 100, 1000, 10000}
+
+	for _, size := range sizes {
+		baseString := strings.Repeat("h", size)
+
+		b.Run(fmt.Sprintf("String_%d", size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				s := baseString
+				_ = s
+			}
+		})
+
+		b.Run(fmt.Sprintf("LineBuffer_%d", size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				lb := New(baseString)
+				_ = lb
+			}
+		})
+	}
+}
+
+func BenchmarkMemoryComparisonAsciiWithAnsi(b *testing.B) {
+	// tests different string lengths to see how memory usage scales
+	sizes := []int{10, 100, 1000, 10000}
+
+	for _, size := range sizes {
+		baseString := strings.Repeat("\x1b[31mh\x1b[0m", size)
+
+		b.Run(fmt.Sprintf("String_%d", size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				s := baseString
+				_ = s
+			}
+		})
+
+		b.Run(fmt.Sprintf("LineBuffer_%d", size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				lb := New(baseString)
+				_ = lb
+			}
+		})
+	}
+}
+
+func BenchmarkMemoryComparisonAsciiWithUnicode(b *testing.B) {
+	// tests different string lengths to see how memory usage scales
+	sizes := []int{10, 100, 1000, 10000}
+
+	for _, size := range sizes {
+		baseString := strings.Repeat("世", size)
+
+		b.Run(fmt.Sprintf("String_%d", size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				s := baseString
+				_ = s
+			}
+		})
+
+		b.Run(fmt.Sprintf("LineBuffer_%d", size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				lb := New(baseString)
+				_ = lb
+			}
+		})
+	}
+}