From 96bae795a57c757df22e8877e00ac9a0e83c753f Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 15 Dec 2020 17:59:17 -0500 Subject: [PATCH] Fix unknown characterset emails Some emails that had an unknown character set. when that happens we can't decode the body of the message properly, so the default body string was being used, and was attempting to be decoded. Only byte strings can be decoded, so the default string is now labeled as a byte encoding. --- aprsd/main.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/aprsd/main.py b/aprsd/main.py index 0c6c2a2..ec05d59 100644 --- a/aprsd/main.py +++ b/aprsd/main.py @@ -220,25 +220,27 @@ def parse_email(msgid, data, server): text = "" html = None # default in case body somehow isn't set below - happened once - body = "* unreadable msg received" + body = b"* unreadable msg received" # this uses the last text or html part in the email, phone companies often put content in an attachment for part in msg.get_payload(): - if ( - part.get_content_charset() is None - ): # or BREAK when we hit a text or html? + if part.get_content_charset() is None: + # or BREAK when we hit a text or html? # We cannot know the character set, # so return decoded "something" + LOG.debug("Email got unknown content type") text = part.get_payload(decode=True) continue charset = part.get_content_charset() if part.get_content_type() == "text/plain": + LOG.debug("Email got text/plain") text = six.text_type( part.get_payload(decode=True), str(charset), "ignore" ).encode("utf8", "replace") if part.get_content_type() == "text/html": + LOG.debug("Email got text/html") html = six.text_type( part.get_payload(decode=True), str(charset), "ignore" ).encode("utf8", "replace") @@ -250,6 +252,7 @@ def parse_email(msgid, data, server): body = html.strip() else: # message is not multipart # email.uscc.net sends no charset, blows up unicode function below + LOG.debug("Email is not multipart") if msg.get_content_charset() is None: text = six.text_type( msg.get_payload(decode=True), "US-ASCII", "ignore"