From 4cd2b464ae2f51394436314a81233913cc6949f5 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Sun, 22 Dec 2024 13:35:35 +0100 Subject: [PATCH] Rename Entity to Reader Entity doesn't make it clear that the message can only be consumed once and is inconsistent with Writer. --- entity.go | 258 +---------------------------- example_test.go | 4 +- mail/reader.go | 6 +- multipart.go | 12 +- reader.go | 267 +++++++++++++++++++++++++++++++ entity_test.go => reader_test.go | 36 ++--- 6 files changed, 299 insertions(+), 284 deletions(-) create mode 100644 reader.go rename entity_test.go => reader_test.go (92%) diff --git a/entity.go b/entity.go index d0aadb60..5ec4cf8a 100644 --- a/entity.go +++ b/entity.go @@ -1,267 +1,15 @@ package message import ( - "bufio" - "errors" "io" - "math" - "strings" - - "github.com/emersion/go-message/textproto" ) -// An Entity is either a whole message or a one of the parts in the body of a -// multipart entity. -// -// An Entity can only be consumed once: after its body is read, it can't be -// used anymore. -type Entity struct { - Header Header // The entity's header. - Body io.Reader // The decoded entity's body. - - mediaType string - mediaParams map[string]string -} +type Entity = Reader -// New makes a new message with the provided header and body. The entity's -// transfer encoding and charset are automatically decoded to UTF-8. -// -// If the message uses an unknown transfer encoding or charset, New returns an -// error that verifies IsUnknownCharset, but also returns an Entity that can -// be read. func New(header Header, body io.Reader) (*Entity, error) { - var err error - - mediaType, mediaParams, _ := header.ContentType() - - // QUIRK: RFC 2045 section 6.4 specifies that multipart messages can't have - // a Content-Transfer-Encoding other than "7bit", "8bit" or "binary". - // However some messages in the wild are non-conformant and have it set to - // e.g. "quoted-printable". So we just ignore it for multipart. - // See https://github.com/emersion/go-message/issues/48 - if !strings.HasPrefix(mediaType, "multipart/") { - enc := header.Get("Content-Transfer-Encoding") - if decoded, encErr := encodingReader(enc, body); encErr != nil { - err = UnknownEncodingError{encErr} - } else { - body = decoded - } - } - - // RFC 2046 section 4.1.2: charset only applies to text/* - if strings.HasPrefix(mediaType, "text/") { - if ch, ok := mediaParams["charset"]; ok { - if converted, charsetErr := charsetReader(ch, body); charsetErr != nil { - err = UnknownCharsetError{charsetErr} - } else { - body = converted - } - } - } - - return &Entity{ - Header: header, - Body: body, - mediaType: mediaType, - mediaParams: mediaParams, - }, err + return NewReader(header, body) } -// NewMultipart makes a new multipart message with the provided header and -// parts. The Content-Type header must begin with "multipart/". -// -// If the message uses an unknown transfer encoding, NewMultipart returns an -// error that verifies IsUnknownCharset, but also returns an Entity that can -// be read. func NewMultipart(header Header, parts []*Entity) (*Entity, error) { - r := &multipartBody{ - header: header, - parts: parts, - } - - return New(header, r) -} - -const defaultMaxHeaderBytes = 1 << 20 // 1 MB - -var errHeaderTooBig = errors.New("message: header exceeds maximum size") - -// limitedReader is the same as io.LimitedReader, but returns a custom error. -type limitedReader struct { - R io.Reader - N int64 -} - -func (lr *limitedReader) Read(p []byte) (int, error) { - if lr.N <= 0 { - return 0, errHeaderTooBig - } - if int64(len(p)) > lr.N { - p = p[0:lr.N] - } - n, err := lr.R.Read(p) - lr.N -= int64(n) - return n, err -} - -// ReadOptions are options for ReadWithOptions. -type ReadOptions struct { - // MaxHeaderBytes limits the maximum permissible size of a message header - // block. If exceeded, an error will be returned. - // - // Set to -1 for no limit, set to 0 for the default value (1MB). - MaxHeaderBytes int64 -} - -// withDefaults returns a sanitised version of the options with defaults/special -// values accounted for. -func (o *ReadOptions) withDefaults() *ReadOptions { - var out ReadOptions - if o != nil { - out = *o - } - if out.MaxHeaderBytes == 0 { - out.MaxHeaderBytes = defaultMaxHeaderBytes - } else if out.MaxHeaderBytes < 0 { - out.MaxHeaderBytes = math.MaxInt64 - } - return &out -} - -// ReadWithOptions see Read, but allows overriding some parameters with -// ReadOptions. -// -// If the message uses an unknown transfer encoding or charset, ReadWithOptions -// returns an error that verifies IsUnknownCharset or IsUnknownEncoding, but -// also returns an Entity that can be read. -func ReadWithOptions(r io.Reader, opts *ReadOptions) (*Entity, error) { - opts = opts.withDefaults() - - lr := &limitedReader{R: r, N: opts.MaxHeaderBytes} - br := bufio.NewReader(lr) - - h, err := textproto.ReadHeader(br) - if err != nil { - return nil, err - } - - lr.N = math.MaxInt64 - - return New(Header{h}, br) -} - -// Read reads a message from r. The message's encoding and charset are -// automatically decoded to raw UTF-8. Note that this function only reads the -// message header. -// -// If the message uses an unknown transfer encoding or charset, Read returns an -// error that verifies IsUnknownCharset or IsUnknownEncoding, but also returns -// an Entity that can be read. -func Read(r io.Reader) (*Entity, error) { - return ReadWithOptions(r, nil) -} - -// MultipartReader returns a MultipartReader that reads parts from this entity's -// body. If this entity is not multipart, it returns nil. -func (e *Entity) MultipartReader() MultipartReader { - if !strings.HasPrefix(e.mediaType, "multipart/") { - return nil - } - if mb, ok := e.Body.(*multipartBody); ok { - return mb - } - return &multipartReader{textproto.NewMultipartReader(e.Body, e.mediaParams["boundary"])} -} - -// writeBodyTo writes this entity's body to w (without the header). -func (e *Entity) writeBodyTo(w *Writer) error { - var err error - if mb, ok := e.Body.(*multipartBody); ok { - err = mb.writeBodyTo(w) - } else { - _, err = io.Copy(w, e.Body) - } - return err -} - -// WriteTo writes this entity's header and body to w. -func (e *Entity) WriteTo(w io.Writer) error { - ew, err := CreateWriter(w, e.Header) - if err != nil { - return err - } - - if err := e.writeBodyTo(ew); err != nil { - ew.Close() - return err - } - - return ew.Close() -} - -// WalkFunc is the type of the function called for each part visited by Walk. -// -// The path argument is a list of multipart indices leading to the part. The -// root part has a nil path. -// -// If there was an encoding error walking to a part, the incoming error will -// describe the problem and the function can decide how to handle that error. -// -// Unlike IMAP part paths, indices start from 0 (instead of 1) and a -// non-multipart message has a nil path (instead of {1}). -// -// If an error is returned, processing stops. -type WalkFunc func(path []int, entity *Entity, err error) error - -// Walk walks the entity's multipart tree, calling walkFunc for each part in -// the tree, including the root entity. -// -// Walk consumes the entity. -func (e *Entity) Walk(walkFunc WalkFunc) error { - var multipartReaders []MultipartReader - var path []int - part := e - for { - var err error - if part == nil { - if len(multipartReaders) == 0 { - break - } - - // Get the next part from the last multipart reader - mr := multipartReaders[len(multipartReaders)-1] - part, err = mr.NextPart() - if err == io.EOF { - multipartReaders = multipartReaders[:len(multipartReaders)-1] - path = path[:len(path)-1] - continue - } else if IsUnknownEncoding(err) || IsUnknownCharset(err) { - // Forward the error to walkFunc - } else if err != nil { - return err - } - - path[len(path)-1]++ - } - - // Copy the path since we'll mutate it on the next iteration - var pathCopy []int - if len(path) > 0 { - pathCopy = make([]int, len(path)) - copy(pathCopy, path) - } - - if err := walkFunc(pathCopy, part, err); err != nil { - return err - } - - if mr := part.MultipartReader(); mr != nil { - multipartReaders = append(multipartReaders, mr) - path = append(path, -1) - } - - part = nil - } - - return nil + return NewMultipartReader(header, parts) } diff --git a/example_test.go b/example_test.go index e44de23d..85bbf280 100644 --- a/example_test.go +++ b/example_test.go @@ -95,8 +95,8 @@ func Example_transform() { } // Define a function that transforms message. - var transform func(w *message.Writer, e *message.Entity) error - transform = func(w *message.Writer, e *message.Entity) error { + var transform func(w *message.Writer, e *message.Reader) error + transform = func(w *message.Writer, e *message.Reader) error { if mr := e.MultipartReader(); mr != nil { // This is a multipart entity, transform each of its parts for { diff --git a/mail/reader.go b/mail/reader.go index c23f30c8..2d7dd216 100644 --- a/mail/reader.go +++ b/mail/reader.go @@ -36,19 +36,19 @@ type Part struct { type Reader struct { Header Header - e *message.Entity + e *message.Reader readers *list.List } // NewReader creates a new mail reader. -func NewReader(e *message.Entity) *Reader { +func NewReader(e *message.Reader) *Reader { mr := e.MultipartReader() if mr == nil { // Artificially create a multipart entity // With this header, no error will be returned by message.NewMultipart var h message.Header h.Set("Content-Type", "multipart/mixed") - me, _ := message.NewMultipart(h, []*message.Entity{e}) + me, _ := message.NewMultipartReader(h, []*message.Reader{e}) mr = me.MultipartReader() } diff --git a/multipart.go b/multipart.go index c406a311..d5f8533e 100644 --- a/multipart.go +++ b/multipart.go @@ -13,9 +13,9 @@ type MultipartReader interface { // NextPart returns the next part in the multipart or an error. When there are // no more parts, the error io.EOF is returned. // - // Entity.Body must be read completely before the next call to NextPart, + // Reader.Body must be read completely before the next call to NextPart, // otherwise it will be discarded. - NextPart() (*Entity, error) + NextPart() (*Reader, error) } type multipartReader struct { @@ -23,12 +23,12 @@ type multipartReader struct { } // NextPart implements MultipartReader. -func (r *multipartReader) NextPart() (*Entity, error) { +func (r *multipartReader) NextPart() (*Reader, error) { p, err := r.r.NextPart() if err != nil { return nil, err } - return New(Header{p.Header}, p) + return NewReader(Header{p.Header}, p) } // Close implements io.Closer. @@ -38,7 +38,7 @@ func (r *multipartReader) Close() error { type multipartBody struct { header Header - parts []*Entity + parts []*Reader r *io.PipeReader w *Writer @@ -88,7 +88,7 @@ func (m *multipartBody) Close() error { } // NextPart implements MultipartReader. -func (m *multipartBody) NextPart() (*Entity, error) { +func (m *multipartBody) NextPart() (*Reader, error) { if m.i >= len(m.parts) { return nil, io.EOF } diff --git a/reader.go b/reader.go new file mode 100644 index 00000000..1d13f7c7 --- /dev/null +++ b/reader.go @@ -0,0 +1,267 @@ +package message + +import ( + "bufio" + "errors" + "io" + "math" + "strings" + + "github.com/emersion/go-message/textproto" +) + +// A Reader is either a whole message or a one of the parts in the body of a +// multipart entity. +// +// A Reader can only be consumed once: after its body is read, it can't be +// used anymore. +type Reader struct { + Header Header // The entity's header. + Body io.Reader // The decoded entity's body. + + mediaType string + mediaParams map[string]string +} + +// NewReader makes a new message with the provided header and body. The entity's +// transfer encoding and charset are automatically decoded to UTF-8. +// +// If the message uses an unknown transfer encoding or charset, New returns an +// error that verifies IsUnknownCharset, but also returns an Reader that can +// be read. +func NewReader(header Header, body io.Reader) (*Reader, error) { + var err error + + mediaType, mediaParams, _ := header.ContentType() + + // QUIRK: RFC 2045 section 6.4 specifies that multipart messages can't have + // a Content-Transfer-Encoding other than "7bit", "8bit" or "binary". + // However some messages in the wild are non-conformant and have it set to + // e.g. "quoted-printable". So we just ignore it for multipart. + // See https://github.com/emersion/go-message/issues/48 + if !strings.HasPrefix(mediaType, "multipart/") { + enc := header.Get("Content-Transfer-Encoding") + if decoded, encErr := encodingReader(enc, body); encErr != nil { + err = UnknownEncodingError{encErr} + } else { + body = decoded + } + } + + // RFC 2046 section 4.1.2: charset only applies to text/* + if strings.HasPrefix(mediaType, "text/") { + if ch, ok := mediaParams["charset"]; ok { + if converted, charsetErr := charsetReader(ch, body); charsetErr != nil { + err = UnknownCharsetError{charsetErr} + } else { + body = converted + } + } + } + + return &Reader{ + Header: header, + Body: body, + mediaType: mediaType, + mediaParams: mediaParams, + }, err +} + +// NewMultipartReader makes a new multipart message with the provided header and +// parts. The Content-Type header must begin with "multipart/". +// +// If the message uses an unknown transfer encoding, NewMultipart returns an +// error that verifies IsUnknownCharset, but also returns an Reader that can +// be read. +func NewMultipartReader(header Header, parts []*Reader) (*Reader, error) { + r := &multipartBody{ + header: header, + parts: parts, + } + + return NewReader(header, r) +} + +const defaultMaxHeaderBytes = 1 << 20 // 1 MB + +var errHeaderTooBig = errors.New("message: header exceeds maximum size") + +// limitedReader is the same as io.LimitedReader, but returns a custom error. +type limitedReader struct { + R io.Reader + N int64 +} + +func (lr *limitedReader) Read(p []byte) (int, error) { + if lr.N <= 0 { + return 0, errHeaderTooBig + } + if int64(len(p)) > lr.N { + p = p[0:lr.N] + } + n, err := lr.R.Read(p) + lr.N -= int64(n) + return n, err +} + +// ReadOptions are options for ReadWithOptions. +type ReadOptions struct { + // MaxHeaderBytes limits the maximum permissible size of a message header + // block. If exceeded, an error will be returned. + // + // Set to -1 for no limit, set to 0 for the default value (1MB). + MaxHeaderBytes int64 +} + +// withDefaults returns a sanitised version of the options with defaults/special +// values accounted for. +func (o *ReadOptions) withDefaults() *ReadOptions { + var out ReadOptions + if o != nil { + out = *o + } + if out.MaxHeaderBytes == 0 { + out.MaxHeaderBytes = defaultMaxHeaderBytes + } else if out.MaxHeaderBytes < 0 { + out.MaxHeaderBytes = math.MaxInt64 + } + return &out +} + +// ReadWithOptions see Read, but allows overriding some parameters with +// ReadOptions. +// +// If the message uses an unknown transfer encoding or charset, ReadWithOptions +// returns an error that verifies IsUnknownCharset or IsUnknownEncoding, but +// also returns an Reader that can be read. +func ReadWithOptions(r io.Reader, opts *ReadOptions) (*Reader, error) { + opts = opts.withDefaults() + + lr := &limitedReader{R: r, N: opts.MaxHeaderBytes} + br := bufio.NewReader(lr) + + h, err := textproto.ReadHeader(br) + if err != nil { + return nil, err + } + + lr.N = math.MaxInt64 + + return NewReader(Header{h}, br) +} + +// Read reads a message from r. The message's encoding and charset are +// automatically decoded to raw UTF-8. Note that this function only reads the +// message header. +// +// If the message uses an unknown transfer encoding or charset, Read returns an +// error that verifies IsUnknownCharset or IsUnknownEncoding, but also returns +// an Reader that can be read. +func Read(r io.Reader) (*Reader, error) { + return ReadWithOptions(r, nil) +} + +// MultipartReader returns a MultipartReader that reads parts from this entity's +// body. If this entity is not multipart, it returns nil. +func (e *Reader) MultipartReader() MultipartReader { + if !strings.HasPrefix(e.mediaType, "multipart/") { + return nil + } + if mb, ok := e.Body.(*multipartBody); ok { + return mb + } + return &multipartReader{textproto.NewMultipartReader(e.Body, e.mediaParams["boundary"])} +} + +// writeBodyTo writes this entity's body to w (without the header). +func (e *Reader) writeBodyTo(w *Writer) error { + var err error + if mb, ok := e.Body.(*multipartBody); ok { + err = mb.writeBodyTo(w) + } else { + _, err = io.Copy(w, e.Body) + } + return err +} + +// WriteTo writes this entity's header and body to w. +func (e *Reader) WriteTo(w io.Writer) error { + ew, err := CreateWriter(w, e.Header) + if err != nil { + return err + } + + if err := e.writeBodyTo(ew); err != nil { + ew.Close() + return err + } + + return ew.Close() +} + +// WalkFunc is the type of the function called for each part visited by Walk. +// +// The path argument is a list of multipart indices leading to the part. The +// root part has a nil path. +// +// If there was an encoding error walking to a part, the incoming error will +// describe the problem and the function can decide how to handle that error. +// +// Unlike IMAP part paths, indices start from 0 (instead of 1) and a +// non-multipart message has a nil path (instead of {1}). +// +// If an error is returned, processing stops. +type WalkFunc func(path []int, entity *Reader, err error) error + +// Walk walks the entity's multipart tree, calling walkFunc for each part in +// the tree, including the root entity. +// +// Walk consumes the entity. +func (e *Reader) Walk(walkFunc WalkFunc) error { + var multipartReaders []MultipartReader + var path []int + part := e + for { + var err error + if part == nil { + if len(multipartReaders) == 0 { + break + } + + // Get the next part from the last multipart reader + mr := multipartReaders[len(multipartReaders)-1] + part, err = mr.NextPart() + if err == io.EOF { + multipartReaders = multipartReaders[:len(multipartReaders)-1] + path = path[:len(path)-1] + continue + } else if IsUnknownEncoding(err) || IsUnknownCharset(err) { + // Forward the error to walkFunc + } else if err != nil { + return err + } + + path[len(path)-1]++ + } + + // Copy the path since we'll mutate it on the next iteration + var pathCopy []int + if len(path) > 0 { + pathCopy = make([]int, len(path)) + copy(pathCopy, path) + } + + if err := walkFunc(pathCopy, part, err); err != nil { + return err + } + + if mr := part.MultipartReader(); mr != nil { + multipartReaders = append(multipartReaders, mr) + path = append(path, -1) + } + + part = nil + } + + return nil +} diff --git a/entity_test.go b/reader_test.go similarity index 92% rename from entity_test.go rename to reader_test.go index 7c01c74a..fc751c88 100644 --- a/entity_test.go +++ b/reader_test.go @@ -11,14 +11,14 @@ import ( "testing" ) -func testMakeEntity() *Entity { +func testMakeEntity() *Reader { var h Header h.Set("Content-Type", "text/plain; charset=US-ASCII") h.Set("Content-Transfer-Encoding", "base64") r := strings.NewReader("Y2Mgc2F2YQ==") - e, _ := New(h, r) + e, _ := NewReader(h, r) return e } @@ -33,20 +33,20 @@ func TestNewEntity(t *testing.T) { } } -func testMakeMultipart() *Entity { +func testMakeMultipart() *Reader { var h1 Header h1.Set("Content-Type", "text/plain") r1 := strings.NewReader("Text part") - e1, _ := New(h1, r1) + e1, _ := NewReader(h1, r1) var h2 Header h2.Set("Content-Type", "text/html") r2 := strings.NewReader("

HTML part

") - e2, _ := New(h2, r2) + e2, _ := NewReader(h2, r2) var h Header h.Set("Content-Type", "multipart/alternative; boundary=IMTHEBOUNDARY") - e, _ := NewMultipart(h, []*Entity{e1, e2}) + e, _ := NewMultipartReader(h, []*Reader{e1, e2}) return e } @@ -69,7 +69,7 @@ const testSingleText = "Content-Type: text/plain\r\n" + "\r\n" + "Message body" -func testMultipart(t *testing.T, e *Entity) { +func testMultipart(t *testing.T, e *Reader) { mr := e.MultipartReader() if mr == nil { t.Fatalf("Expected MultipartReader not to return nil") @@ -266,7 +266,7 @@ func TestEntity_WriteTo_convert(t *testing.T) { h.Set("Content-Type", "text/plain; charset=utf-8") h.Set("Content-Transfer-Encoding", "base64") r := strings.NewReader("Qm9uam91ciDDoCB0b3Vz") - e, _ := New(h, r) + e, _ := NewReader(h, r) e.Header.Set("Content-Transfer-Encoding", "quoted-printable") @@ -306,15 +306,15 @@ func TestNew_unknownTransferEncoding(t *testing.T) { expected := "hey there" r := strings.NewReader(expected) - e, err := New(h, r) + e, err := NewReader(h, r) if err == nil { - t.Fatal("New(unknown transfer encoding): expected an error") + t.Fatal("NewReader(unknown transfer encoding): expected an error") } if !IsUnknownEncoding(err) { - t.Fatal("New(unknown transfer encoding): expected an error that verifies IsUnknownEncoding") + t.Fatal("NewReader(unknown transfer encoding): expected an error that verifies IsUnknownEncoding") } if !errors.As(err, &UnknownEncodingError{}) { - t.Fatal("New(unknown transfer encoding): expected an error that verifies errors.As(err, &EncodingError{})") + t.Fatal("NewReader(unknown transfer encoding): expected an error that verifies errors.As(err, &EncodingError{})") } if b, err := ioutil.ReadAll(e.Body); err != nil { @@ -331,12 +331,12 @@ func TestNew_unknownCharset(t *testing.T) { expected := "hey there" r := strings.NewReader(expected) - e, err := New(h, r) + e, err := NewReader(h, r) if err == nil { - t.Fatal("New(unknown charset): expected an error") + t.Fatal("NewReader(unknown charset): expected an error") } if !IsUnknownCharset(err) { - t.Fatal("New(unknown charset): expected an error that verifies IsUnknownCharset") + t.Fatal("NewReader(unknown charset): expected an error that verifies IsUnknownCharset") } if b, err := ioutil.ReadAll(e.Body); err != nil { @@ -372,7 +372,7 @@ func TestNew_paddedBase64(t *testing.T) { e, err := Read(strings.NewReader(testPartRaw)) if err != nil { - t.Fatal("New(padded Base64): expected no error, got", err) + t.Fatal("Read(padded Base64): expected no error, got", err) } if b, err := ioutil.ReadAll(e.Body); err != nil { @@ -398,9 +398,9 @@ type testWalkPart struct { err error } -func walkCollect(e *Entity) ([]testWalkPart, error) { +func walkCollect(e *Reader) ([]testWalkPart, error) { var l []testWalkPart - err := e.Walk(func(path []int, part *Entity, err error) error { + err := e.Walk(func(path []int, part *Reader, err error) error { var body string if part.MultipartReader() == nil { b, err := ioutil.ReadAll(part.Body)