Skip to content

Commit

Permalink
deflate: Improve cost estimate, level 7-9 (#458)
Browse files Browse the repository at this point in the history
* deflate: Improve cost estimate, level 7-9
* Move alloc tests.
  • Loading branch information
klauspost authored Dec 1, 2021
1 parent 901aaf2 commit 25adde5
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 85 deletions.
67 changes: 0 additions & 67 deletions compressible_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,7 @@ package compress
import (
"crypto/rand"
"encoding/base32"
"io/ioutil"
"strconv"
"strings"
"testing"

"github.com/klauspost/compress/flate"
"github.com/klauspost/compress/gzip"
)

func BenchmarkEstimate(b *testing.B) {
Expand Down Expand Up @@ -239,64 +233,3 @@ Thoughts?`)
b.Log(ShannonEntropyBits(testData))
})
}

func BenchmarkCompressAllocations(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
for j := -2; j <= 9; j++ {
b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) {
b.Run("flate", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := flate.NewWriter(ioutil.Discard, j)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
b.Run("gzip", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := gzip.NewWriterLevel(ioutil.Discard, j)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
})
}
}

func BenchmarkCompressAllocationsSingle(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
const level = 2
b.Run("flate", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := flate.NewWriter(ioutil.Discard, level)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
b.Run("gzip", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := gzip.NewWriterLevel(ioutil.Discard, level)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
}
50 changes: 32 additions & 18 deletions flate/deflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"io"
"math"
"math/bits"

comp "github.com/klauspost/compress"
)

const (
Expand Down Expand Up @@ -95,8 +97,9 @@ type advancedState struct {
hashOffset int

// input window: unprocessed data is window[index:windowEnd]
index int
hashMatch [maxMatchLength + minMatchLength]uint32
index int
estBitsPerByte int
hashMatch [maxMatchLength + minMatchLength]uint32

hash uint32
ii uint16 // position of last match, intended to overflow to reset.
Expand Down Expand Up @@ -265,7 +268,7 @@ func (d *compressor) fillWindow(b []byte) {
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) {
func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
Expand All @@ -290,22 +293,26 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
minIndex = 0
}
offset = 0
const assumeBits = 8
cGain := 0

// Base is 4 bytes at with an additional cost.
// Matches must be better than this.
cGain := minMatchLength*bpb - 12
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos)
newGain := n*assumeBits - bits.Len32(uint32(pos-i))
if n >= minMatchLength && newGain > cGain {
length = n
offset = pos - i
cGain = newGain
ok = true
if n >= nice {
// The match is good enough that we don't try to find a better one.
break
if n > length {
newGain := n*bpb - bits.Len32(uint32(pos-i)) - 1
if newGain > cGain {
length = n
offset = pos - i
cGain = newGain
ok = true
if n >= nice {
// The match is good enough that we don't try to find a better one.
break
}
wEnd = win[pos+n]
}
wEnd = win[pos+n]
}
}
if i <= minIndex {
Expand Down Expand Up @@ -379,6 +386,11 @@ func (d *compressor) deflateLazy() {
if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return
}
s.estBitsPerByte = 8
if !d.sync {
s.estBitsPerByte = comp.ShannonEntropyBits(d.window[s.index:d.windowEnd])
s.estBitsPerByte = int(1 + float64(s.estBitsPerByte)/float64(d.windowEnd-s.index))
}

s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
if s.index < s.maxInsertIndex {
Expand Down Expand Up @@ -431,7 +443,7 @@ func (d *compressor) deflateLazy() {
}

if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok {
if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead, s.estBitsPerByte); ok {
s.length = newLength
s.offset = newOffset
}
Expand All @@ -444,7 +456,7 @@ func (d *compressor) deflateLazy() {
// Offset of 2 seems to yield best results.
const checkOff = 2
prevIndex := s.index - 1
if prevLength < d.nice && prevIndex+prevLength+checkOff < s.maxInsertIndex {
if prevIndex+prevLength+checkOff < s.maxInsertIndex {
end := lookahead
if lookahead > maxMatchLength {
end = maxMatchLength
Expand Down Expand Up @@ -642,7 +654,9 @@ func (d *compressor) write(b []byte) (n int, err error) {
}
n = len(b)
for len(b) > 0 {
d.step(d)
if d.windowEnd == len(d.window) || d.sync {
d.step(d)
}
b = b[d.fill(d, b):]
if d.err != nil {
return 0, d.err
Expand Down
37 changes: 37 additions & 0 deletions flate/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,40 @@ func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err er
}
return written, err
}

func BenchmarkCompressAllocations(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
for j := -2; j <= 9; j++ {
b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) {
b.Run("flate", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := NewWriter(ioutil.Discard, j)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
})
}
}

func BenchmarkCompressAllocationsSingle(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
const level = 2
b.Run("flate", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := NewWriter(ioutil.Discard, level)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
}
40 changes: 40 additions & 0 deletions gzip/gzip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"io"
"io/ioutil"
"math/rand"
"strconv"
"strings"
"testing"
"time"
)
Expand Down Expand Up @@ -524,3 +526,41 @@ func benchmarkOldGzipN(b *testing.B, level int) {
}
*/

func BenchmarkCompressAllocations(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
for j := -2; j <= 9; j++ {
b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) {
b.Run("gzip", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := NewWriterLevel(ioutil.Discard, j)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
})
}
}

func BenchmarkCompressAllocationsSingle(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
const level = 2

b.Run("gzip", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := NewWriterLevel(ioutil.Discard, level)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
}

0 comments on commit 25adde5

Please sign in to comment.