mirror of
https://github.com/craigerl/aprsd.git
synced 2024-11-10 10:33:31 -05:00
Fix unknown characterset emails
Some emails that had an unknown character set. when that happens we can't decode the body of the message properly, so the default body string was being used, and was attempting to be decoded. Only byte strings can be decoded, so the default string is now labeled as a byte encoding.
This commit is contained in:
parent
b4526f3a0e
commit
96bae795a5
@ -220,25 +220,27 @@ def parse_email(msgid, data, server):
|
||||
text = ""
|
||||
html = None
|
||||
# default in case body somehow isn't set below - happened once
|
||||
body = "* unreadable msg received"
|
||||
body = b"* unreadable msg received"
|
||||
# this uses the last text or html part in the email, phone companies often put content in an attachment
|
||||
for part in msg.get_payload():
|
||||
if (
|
||||
part.get_content_charset() is None
|
||||
): # or BREAK when we hit a text or html?
|
||||
if part.get_content_charset() is None:
|
||||
# or BREAK when we hit a text or html?
|
||||
# We cannot know the character set,
|
||||
# so return decoded "something"
|
||||
LOG.debug("Email got unknown content type")
|
||||
text = part.get_payload(decode=True)
|
||||
continue
|
||||
|
||||
charset = part.get_content_charset()
|
||||
|
||||
if part.get_content_type() == "text/plain":
|
||||
LOG.debug("Email got text/plain")
|
||||
text = six.text_type(
|
||||
part.get_payload(decode=True), str(charset), "ignore"
|
||||
).encode("utf8", "replace")
|
||||
|
||||
if part.get_content_type() == "text/html":
|
||||
LOG.debug("Email got text/html")
|
||||
html = six.text_type(
|
||||
part.get_payload(decode=True), str(charset), "ignore"
|
||||
).encode("utf8", "replace")
|
||||
@ -250,6 +252,7 @@ def parse_email(msgid, data, server):
|
||||
body = html.strip()
|
||||
else: # message is not multipart
|
||||
# email.uscc.net sends no charset, blows up unicode function below
|
||||
LOG.debug("Email is not multipart")
|
||||
if msg.get_content_charset() is None:
|
||||
text = six.text_type(
|
||||
msg.get_payload(decode=True), "US-ASCII", "ignore"
|
||||
|
Loading…
Reference in New Issue
Block a user