Skip to content

Commit

Permalink
fix exception in MsgInfo.msg_id when email body contains 8 bit char…
Browse files Browse the repository at this point in the history
…acters
  • Loading branch information
FelixSchwarz committed Aug 5, 2024
1 parent 8b20644 commit a0c94c6
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
25 changes: 5 additions & 20 deletions schwarz/mailqueue/message_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import re
from datetime import datetime as DateTime, timedelta as TimeDelta
from email.header import decode_header
from email.parser import FeedParser, HeaderParser
from io import BytesIO, TextIOWrapper
from email.parser import BytesHeaderParser, FeedParser
from io import BytesIO
from typing import BinaryIO, NamedTuple, Optional, Sequence

from boltons.timeutils import ConstantTZInfo, LocalTZ
Expand Down Expand Up @@ -92,14 +92,11 @@ def __new__(cls, from_addr, to_addrs, msg_fp, queue_date=None, last=None, retrie
def msg_id(self):
old_pos = self.msg_fp.tell()
self.msg_fp.seek(0)
# Unfortunately Python's TextIOWrapper always closes wrapped files:
# https://bugs.python.org/issue21363
msg_str_fp = TextIOWrapper(UnclosableWrapper(self.msg_fp), encoding='ascii')
msg_headers = HeaderParser().parse(msg_str_fp, headersonly=True)
# message ids are usually enclosed in angle brackets but these do NOT
# belong to the message id.
msg_headers = BytesHeaderParser().parse(self.msg_fp, headersonly=True)
msg_id_value = msg_headers['Message-ID']
self.msg_fp.seek(old_pos)
# message ids are usually enclosed in angle brackets but these do NOT
# belong to the message id.
return strip_brackets(msg_id_value)

@property
Expand All @@ -111,18 +108,6 @@ def msg_bytes(self):
return data


class UnclosableWrapper(object):
def __init__(self, wrapped_instance):
self.wrapped_instance = wrapped_instance

def __getattr__(self, name):
return getattr(self.wrapped_instance, name)

def close(self):
pass



_re_angle_brackets = re.compile(br'^<?(.+?)>?$')
_re_angle_brackets_str = re.compile('^<?(.+?)>?$')
_re_header_list = re.compile(r'\s*,\s*')
Expand Down
20 changes: 20 additions & 0 deletions tests/message_parsing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: MIT

from datetime import datetime as DateTime
from email.message import Message
from io import BytesIO

from boltons.timeutils import LocalTZ
Expand Down Expand Up @@ -43,6 +44,25 @@ def test_can_parse_encoded_header():
assert msg_info.from_addr == '[email protected]'
assert msg_info.to_addrs == ('[email protected]',)

def test_can_parse_message_with_utf8_data():
msg = Message()
msg['Message-ID'] = '<[email protected]>'
msg['Mime-Version'] = '1.0'
msg['Content-Transfer-Encoding'] = '8bit'
msg['Content-Type'] = 'text/plain; charset=UTF-8'
msg.set_payload(b'some \xc2\xbbnon-ascii\xc2\xab text')

queue_fp = build_queued_message(
sender='[email protected]',
recipient='[email protected]',
msg=msg,
)
msg_info = parse_message_envelope(queue_fp)
assert msg_info.from_addr == '[email protected]'
assert msg_info.to_addrs == ('[email protected]',)
assert msg_info.msg_id == '[email protected]'
assert msg_info.msg_fp.read() == msg.as_bytes()

def test_can_parse_queue_metadata():
queue_date = DateTime(2020, 10, 1, hour=15, minute=42, second=21, tzinfo=LocalTZ)
last_attempt = DateTime(2020, 10, 1, hour=16, minute=0, tzinfo=LocalTZ)
Expand Down

0 comments on commit a0c94c6

Please sign in to comment.