Skip to content

Commit

Permalink
broken but better
Browse files Browse the repository at this point in the history
  • Loading branch information
robinovitch61 committed Jan 20, 2025
1 parent 225ba5b commit 282b9e6
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 85 deletions.
Binary file added cpu.prof
Binary file not shown.
144 changes: 115 additions & 29 deletions internal/viewport/linebuffer/linebuffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ import (
)

// LineBuffer provides functionality to get sequential strings of a specified terminal width, accounting
// for the ansi escape codes styling the content.
// for the ansi escape codes styling the Content.
type LineBuffer struct {
Content string // underlying string with ansi codes. utf-8 bytes
leftRuneIdx int // left plaintext rune idx to start next PopLeft result from
lineNoAnsi string // line without ansi codes. utf-8 bytes
lineNoAnsi string // Content without ansi codes. utf-8 bytes
lineNoAnsiRunes []rune // runes of lineNoAnsi. len(lineNoAnsiRunes) == len(lineNoAnsiWidths)
runeIdxToByteOffset []int // idx of lineNoAnsiRunes to byte offset. len(runeIdxToByteOffset) == len(lineNoAnsiRunes)
lineNoAnsiWidths []int // terminal cell widths of lineNoAnsi. len(lineNoAnsiWidths) == len(lineNoAnsiRunes)
lineNoAnsiCumWidths []int // cumulative lineNoAnsiWidths
ansiCodeIndexes [][]int // slice of startByte, endByte indexes of ansi codes in the line
ansiCodeIndexes [][]int // slice of startByte, endByte indexes of ansi codes in the Content
}

func New(line string) LineBuffer {
Expand All @@ -27,31 +27,70 @@ func New(line string) LineBuffer {
leftRuneIdx: 0,
}

lb.ansiCodeIndexes = constants.AnsiRegex.FindAllStringIndex(line, -1)
lb.lineNoAnsi = stripAnsi(line)
lb.ansiCodeIndexes = findAnsiRanges(line)

lb.lineNoAnsiRunes = []rune(lb.lineNoAnsi)
n := len(lb.lineNoAnsiRunes)
lb.runeIdxToByteOffset = make([]int, n+1)
lb.lineNoAnsiWidths = make([]int, n)
lb.lineNoAnsiCumWidths = make([]int, n)
if len(lb.ansiCodeIndexes) > 0 {
totalLen := len(line)
for _, r := range lb.ansiCodeIndexes {
totalLen -= r[1] - r[0]
}

buf := make([]byte, 0, totalLen)
lastPos := 0
for _, r := range lb.ansiCodeIndexes {
buf = append(buf, line[lastPos:r[0]]...)
lastPos = r[1]
}
buf = append(buf, line[lastPos:]...)
lb.lineNoAnsi = string(buf)
} else {
lb.lineNoAnsi = line
}

n := utf8.RuneCountInString(lb.lineNoAnsi)

// single allocation for all integer slices
combined := make([]int, n+1+n+n)
lb.runeIdxToByteOffset = combined[:n+1]
lb.lineNoAnsiWidths = combined[n+1 : n+1+n]
lb.lineNoAnsiCumWidths = combined[n+1+n:]

lb.lineNoAnsiRunes = make([]rune, n)

currentOffset := 0
cumWidth := 0
for i, r := range lb.lineNoAnsiRunes {
i := 0
for _, r := range lb.lineNoAnsi {
lb.runeIdxToByteOffset[i] = currentOffset
currentOffset += utf8.RuneLen(r)

lb.lineNoAnsiRunes[i] = r
width := runewidth.RuneWidth(r)
lb.lineNoAnsiWidths[i] = width
cumWidth += width
lb.lineNoAnsiCumWidths[i] = cumWidth
i++
}
lb.runeIdxToByteOffset[n] = currentOffset

return lb
}

func ToLineBuffers(lines []string) []LineBuffer {
res := make([]LineBuffer, len(lines))
for i, line := range lines {
res[i] = New(line)
}
return res
}

func ToStrings(lbs []LineBuffer) []string {
res := make([]string, len(lbs))
for i, lb := range lbs {
res[i] = lb.Content
}
return res
}

func (l LineBuffer) TotalLines(width int) int {
if width == 0 {
return 0
Expand All @@ -68,7 +107,7 @@ func (l *LineBuffer) SeekToLine(n, width int) {
}

func (l *LineBuffer) SeekToWidth(width int) {
// width can go past end, in which case PopLeft() returns "". Required when e.g. panning past line's end.
// width can go past end, in which case PopLeft() returns "". Required when e.g. panning past Content's end.
if width <= 0 {
l.leftRuneIdx = 0
return
Expand Down Expand Up @@ -137,7 +176,7 @@ func (l *LineBuffer) PopLeft(width int, continuation, toHighlight string, highli

res := result.String()

// apply left/right line continuation indicators
// apply left/right Content continuation indicators
if len(continuation) > 0 && (startRuneIdx > 0 || l.leftRuneIdx < len(l.lineNoAnsiRunes)) {
continuationRunes := []rune(continuation)

Expand Down Expand Up @@ -274,8 +313,8 @@ func reapplyAnsi(original, truncated string, truncByteOffset int, ansiCodeIndexe
return string(result)
}

// highlightLine highlights a string in a line that potentially has ansi codes in it without disrupting them
// start and end are the byte offsets for which highlighting is considered in the line, not counting ansi codes
// highlightLine highlights a string in a Content that potentially has ansi codes in it without disrupting them
// start and end are the byte offsets for which highlighting is considered in the Content, not counting ansi codes
func highlightLine(line, highlight string, highlightStyle lipgloss.Style, start, end int) string {
if line == "" || highlight == "" {
return line
Expand Down Expand Up @@ -339,7 +378,28 @@ func highlightLine(line, highlight string, highlightStyle lipgloss.Style, start,
}

func stripAnsi(input string) string {
return constants.AnsiRegex.ReplaceAllString(input, "")
ranges := findAnsiRanges(input)
if len(ranges) == 0 {
return input
}

totalAnsiLen := 0
for _, r := range ranges {
totalAnsiLen += r[1] - r[0]
}

finalLen := len(input) - totalAnsiLen
var builder strings.Builder
builder.Grow(finalLen)

lastPos := 0
for _, r := range ranges {
builder.WriteString(input[lastPos:r[0]])
lastPos = r[1]
}

builder.WriteString(input[lastPos:])
return builder.String()
}

func simplifyAnsiCodes(ansis []string) []string {
Expand Down Expand Up @@ -377,18 +437,6 @@ func simplifyAnsiCodes(ansis []string) []string {
return ansis
}

func initByteOffsets(runes []rune) []int {
offsets := make([]int, len(runes)+1)
currentOffset := 0
for i, r := range runes {
offsets[i] = currentOffset
runeLen := utf8.RuneLen(r)
currentOffset += runeLen
}
offsets[len(runes)] = currentOffset
return offsets
}

// overflowsLeft checks if a substring overflows a string on the left if the string were to start at startByteIdx inclusive.
// assumes s has no ansi codes.
// It performs a case-sensitive comparison and returns two values:
Expand Down Expand Up @@ -577,3 +625,41 @@ func replaceRuneWithRunes(rs []rune, idxToReplace int, replaceWith []rune) []run
copy(result[idxToReplace+len(replaceWith):], rs[idxToReplace+1:])
return result
}

func findAnsiRanges(s string) [][]int {
// pre-count to allocate exact size
count := strings.Count(s, "\x1b[")
if count == 0 {
return nil
}

allRanges := make([]int, count*2)
ranges := make([][]int, count)

for i := 0; i < count; i++ {
ranges[i] = allRanges[i*2 : i*2+2]
}

rangeIdx := 0
for i := 0; i < len(s); {
if i+1 < len(s) && s[i] == '\x1b' && s[i+1] == '[' {
start := i
i += 2 // skip \x1b[

// find the 'm' that ends this sequence
for i < len(s) && s[i] != 'm' {
i++
}

if i < len(s) && s[i] == 'm' {
allRanges[rangeIdx*2] = start
allRanges[rangeIdx*2+1] = i + 1
rangeIdx++
i++
continue
}
}
i++
}
return ranges[:rangeIdx]
}
105 changes: 105 additions & 0 deletions internal/viewport/linebuffer/linebuffer_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package linebuffer

import (
"fmt"
"strings"
"testing"
)

// Example of interpreting output of `go test -v -bench=. -run=^$ -benchmem ./internal/linebuffer`
// BenchmarkNewLongLine-8 7842 152640 ns/op 904063 B/op 8 allocs/op
// - 7842: benchmark ran 7,842 iterations to get a stable measurement
// - 152640 ns/op: each call to New() takes about 153 microseconds
// - 904063 B/op: each operation allocates about 904KB of memory
// - 8 allocs/op: each call to New() makes 8 distinct memory allocations

func BenchmarkNewLongLine(b *testing.B) {
base := strings.Repeat("hi there random words woohoo ", 1000)

// reset timer to exclude setup time
b.ResetTimer()

// enable memory profiling
b.ReportAllocs()

for i := 0; i < b.N; i++ {
lb := New(base)
// prevent compiler optimizations from eliminating the call
_ = lb
}
}

func BenchmarkMemoryComparisonAscii(b *testing.B) {
// tests different string lengths to see how memory usage scales
sizes := []int{10, 100, 1000, 10000}

for _, size := range sizes {
baseString := strings.Repeat("h", size)

b.Run(fmt.Sprintf("String_%d", size), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
s := baseString
_ = s
}
})

b.Run(fmt.Sprintf("LineBuffer_%d", size), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
lb := New(baseString)
_ = lb
}
})
}
}

func BenchmarkMemoryComparisonAsciiWithAnsi(b *testing.B) {
// tests different string lengths to see how memory usage scales
sizes := []int{10, 100, 1000, 10000}

for _, size := range sizes {
baseString := strings.Repeat("\x1b[31mh\x1b[0m", size)

b.Run(fmt.Sprintf("String_%d", size), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
s := baseString
_ = s
}
})

b.Run(fmt.Sprintf("LineBuffer_%d", size), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
lb := New(baseString)
_ = lb
}
})
}
}

func BenchmarkMemoryComparisonAsciiWithUnicode(b *testing.B) {
// tests different string lengths to see how memory usage scales
sizes := []int{10, 100, 1000, 10000}

for _, size := range sizes {
baseString := strings.Repeat("世", size)

b.Run(fmt.Sprintf("String_%d", size), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
s := baseString
_ = s
}
})

b.Run(fmt.Sprintf("LineBuffer_%d", size), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
lb := New(baseString)
_ = lb
}
})
}
}
Loading

0 comments on commit 282b9e6

Please sign in to comment.