diff --git a/go.mod b/go.mod index af2bb60..72fe818 100644 --- a/go.mod +++ b/go.mod @@ -1,31 +1,30 @@ module github.com/CorentinB/warc -go 1.21 +go 1.22 require ( github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 - github.com/klauspost/compress v1.17.7 - github.com/klauspost/pgzip v1.2.6 + github.com/klauspost/compress v1.17.9 github.com/paulbellamy/ratecounter v0.2.0 - github.com/refraction-networking/utls v1.6.3 + github.com/refraction-networking/utls v1.6.7 github.com/remeh/sizedwaitgroup v1.0.0 github.com/satori/go.uuid v1.2.0 github.com/sirupsen/logrus v1.9.3 - github.com/spf13/cobra v1.8.0 + github.com/spf13/cobra v1.8.1 github.com/ulikunitz/xz v0.5.12 go.uber.org/goleak v1.3.0 - golang.org/x/net v0.22.0 - golang.org/x/sync v0.6.0 + golang.org/x/net v0.28.0 + golang.org/x/sync v0.8.0 ) require ( github.com/andybalholm/brotli v1.1.0 // indirect - github.com/cloudflare/circl v1.3.7 // indirect + github.com/cloudflare/circl v1.3.9 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/kr/pretty v0.3.0 // indirect - github.com/quic-go/quic-go v0.41.0 // indirect + github.com/quic-go/quic-go v0.46.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - golang.org/x/crypto v0.21.0 // indirect - golang.org/x/sys v0.18.0 // indirect + golang.org/x/crypto v0.26.0 // indirect + golang.org/x/sys v0.24.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect ) diff --git a/go.sum b/go.sum index 07a7b92..fcf71f6 100644 --- a/go.sum +++ b/go.sum @@ -4,7 +4,10 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU= github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= +github.com/cloudflare/circl v1.3.9 h1:QFrlgFYf2Qpi8bSpVPK1HBvWpx16v/1TZivyo7pGuBE= +github.com/cloudflare/circl v1.3.9/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -21,8 +24,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= -github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= -github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= @@ -41,8 +44,12 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/quic-go/quic-go v0.41.0 h1:aD8MmHfgqTURWNJy48IYFg2OnxwHT3JL7ahGs73lb4k= github.com/quic-go/quic-go v0.41.0/go.mod h1:qCkNjqczPEvgsOnxZ0eCD14lv+B2LHlFAB++CNOh9hA= +github.com/quic-go/quic-go v0.46.0 h1:uuwLClEEyk1DNvchH8uCByQVjo3yKL9opKulExNDs7Y= +github.com/quic-go/quic-go v0.46.0/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI= github.com/refraction-networking/utls v1.6.3 h1:MFOfRN35sSx6K5AZNIoESsBuBxS2LCgRilRIdHb6fDc= github.com/refraction-networking/utls v1.6.3/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs= +github.com/refraction-networking/utls v1.6.7 h1:zVJ7sP1dJx/WtVuITug3qYUq034cDq9B2MR1K67ULZM= +github.com/refraction-networking/utls v1.6.7/go.mod h1:BC3O4vQzye5hqpmDTWUqi4P5DDhzJfkV1tdqtawQIH0= github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E= github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo= github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= @@ -54,6 +61,8 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -66,15 +75,24 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/utils.go b/utils.go index c083f89..d1d2bea 100644 --- a/utils.go +++ b/utils.go @@ -17,7 +17,6 @@ import ( "time" gzip "github.com/klauspost/compress/gzip" - "github.com/klauspost/pgzip" "github.com/klauspost/compress/zstd" ) @@ -117,26 +116,8 @@ func isLineStartingWithHTTPMethod(line string) bool { func NewWriter(writer io.Writer, fileName string, compression string, contentLengthHeader string) (*Writer, error) { if compression != "" { if compression == "GZIP" { - var gzipWriter *gzip.Writer - // If the record's Content-Length is bigger than a megabyte, we use the parallel gzip library - if contentLengthHeader != "" { - contentLength, err := strconv.Atoi(contentLengthHeader) - if err != nil { - return nil, err - } - - if contentLength > 1000000 { - pgzipWriter := pgzip.NewWriter(writer) - return &Writer{ - FileName: fileName, - Compression: compression, - PGZIPWriter: pgzipWriter, - FileWriter: bufio.NewWriter(pgzipWriter), - }, nil - } - } + gzipWriter := gzip.NewWriter(writer) - gzipWriter = gzip.NewWriter(writer) return &Writer{ FileName: fileName, Compression: compression, @@ -155,7 +136,7 @@ func NewWriter(writer io.Writer, fileName string, compression string, contentLen FileWriter: bufio.NewWriter(zstdWriter), }, nil } - return nil, errors.New("Invalid compression algorithm: " + compression) + return nil, errors.New("invalid compression algorithm: " + compression) } return &Writer{ @@ -238,7 +219,7 @@ func checkRotatorSettings(settings *RotatorSettings) (err error) { // Check if the specified compression algorithm is valid if settings.Compression != "" && settings.Compression != "GZIP" && settings.Compression != "ZSTD" { - return errors.New("Invalid compression algorithm: " + settings.Compression) + return errors.New("invalid compression algorithm: " + settings.Compression) } // Add few headers to the warcinfo payload, to not have it empty diff --git a/warc.go b/warc.go index 979ffa6..4241a76 100644 --- a/warc.go +++ b/warc.go @@ -76,8 +76,6 @@ func (s *RotatorSettings) NewWARCRotator() (recordWriterChan chan *RecordBatch, func (w *Writer) CloseCompressedWriter() { if w.GZIPWriter != nil { w.GZIPWriter.Close() - } else if w.PGZIPWriter != nil { - w.PGZIPWriter.Close() } else if w.ZSTDWriter != nil { w.ZSTDWriter.Close() } diff --git a/write.go b/write.go index 0494711..67db1d1 100644 --- a/write.go +++ b/write.go @@ -9,7 +9,6 @@ import ( "time" "github.com/klauspost/compress/gzip" - "github.com/klauspost/pgzip" "github.com/klauspost/compress/zstd" uuid "github.com/satori/go.uuid" @@ -20,7 +19,6 @@ type Writer struct { FileName string Compression string GZIPWriter *gzip.Writer - PGZIPWriter *pgzip.Writer ZSTDWriter *zstd.Encoder FileWriter *bufio.Writer ParallelGZIP bool