diff --git a/.last-branch b/.last-branch index 3cbaaf7..0783501 100644 --- a/.last-branch +++ b/.last-branch @@ -1 +1 @@ -v20260108-30-customer-delete-ticket-remark-scopes +v20260108-31-inbox-empty-body-attachment-render diff --git a/containers/backupchecks/src/backend/app/email_utils.py b/containers/backupchecks/src/backend/app/email_utils.py index 8263074..b62993f 100644 --- a/containers/backupchecks/src/backend/app/email_utils.py +++ b/containers/backupchecks/src/backend/app/email_utils.py @@ -3,6 +3,7 @@ from __future__ import annotations from email import policy from email.parser import BytesParser from email.utils import parseaddr +import re from typing import List, Optional, Tuple @@ -125,3 +126,42 @@ def extract_best_html_from_eml( return None _fn, html_text = items[0] return html_text or None + + +def is_effectively_blank_html(value: str | None) -> bool: + """Return True when an HTML body is effectively empty. + + Some sources produce Graph bodies that are non-empty strings but contain only + an empty HTML skeleton (e.g. ) or whitespace. + In those cases we want to treat the body as empty so we can fall back to an + HTML report attachment stored in the EML. + """ + if value is None: + return True + if not isinstance(value, str): + return False + + raw = value.strip() + if raw == "": + return True + + # Fast path: if we clearly have content-bearing elements, it is not blank. + # (This avoids false positives for report-like HTML.) + if re.search(r"<(table|img|svg|pre|ul|ol|li|iframe|object|embed)\b", raw, re.IGNORECASE): + return False + + # Try to isolate the body content; if no body tag is present, evaluate the full string. + m = re.search(r"]*>(.*?)", raw, re.IGNORECASE | re.DOTALL) + body = m.group(1) if m else raw + + # Remove comments, scripts, and styles. + body = re.sub(r"", "", body, flags=re.DOTALL) + body = re.sub(r"]*>.*?", "", body, flags=re.IGNORECASE | re.DOTALL) + body = re.sub(r"]*>.*?", "", body, flags=re.IGNORECASE | re.DOTALL) + + # Strip tags and common non-breaking whitespace entities. + text = re.sub(r"<[^>]+>", "", body) + text = text.replace(" ", " ").replace("\xa0", " ") + text = re.sub(r"\s+", "", text) + + return text == "" diff --git a/containers/backupchecks/src/backend/app/mail_importer.py b/containers/backupchecks/src/backend/app/mail_importer.py index d4a4dae..ffa81c1 100644 --- a/containers/backupchecks/src/backend/app/mail_importer.py +++ b/containers/backupchecks/src/backend/app/mail_importer.py @@ -13,7 +13,7 @@ from sqlalchemy import func from . import db from .models import MailMessage, SystemSettings, Job, JobRun from .parsers import parse_mail_message -from .email_utils import normalize_from_address, extract_best_html_from_eml +from .email_utils import normalize_from_address, extract_best_html_from_eml, is_effectively_blank_html from .job_matching import find_matching_job @@ -230,13 +230,13 @@ def _store_messages(settings: SystemSettings, messages): # Some systems send empty bodies and put the actual report in an HTML attachment. # Graph may still return a body that only contains whitespace/newlines; treat that # as empty so we can fall back to the attachment. - def _is_blank(s): + def _is_blank_text(s): return s is None or (isinstance(s, str) and s.strip() == "") # If we have raw EML bytes and no meaningful body content, extract the first # HTML attachment and use it as the HTML body so parsers and the inbox preview # can work. - if _is_blank(mail.html_body) and _is_blank(mail.text_body) and mail.eml_blob: + if is_effectively_blank_html(mail.html_body) and _is_blank_text(mail.text_body) and mail.eml_blob: attachment_html = extract_best_html_from_eml(mail.eml_blob) if attachment_html: mail.html_body = attachment_html diff --git a/containers/backupchecks/src/backend/app/main/routes_inbox.py b/containers/backupchecks/src/backend/app/main/routes_inbox.py index de3bcc8..7caee45 100644 --- a/containers/backupchecks/src/backend/app/main/routes_inbox.py +++ b/containers/backupchecks/src/backend/app/main/routes_inbox.py @@ -1,7 +1,7 @@ from .routes_shared import * # noqa: F401,F403 from .routes_shared import _format_datetime, _log_admin_event, _send_mail_message_eml_download -from ..email_utils import extract_best_html_from_eml +from ..email_utils import extract_best_html_from_eml, is_effectively_blank_html import time @@ -113,7 +113,7 @@ def inbox_message_detail(message_id: int): ), } - def _is_blank(s): + def _is_blank_text(s): return s is None or (isinstance(s, str) and s.strip() == "") html_body = getattr(msg, "html_body", None) @@ -121,14 +121,14 @@ def inbox_message_detail(message_id: int): # For legacy messages: if the Graph body is empty/whitespace but the real report # is an HTML attachment in the stored EML, extract and render it. - if _is_blank(html_body) and _is_blank(text_body) and getattr(msg, "eml_blob", None): + if is_effectively_blank_html(html_body) and _is_blank_text(text_body) and getattr(msg, "eml_blob", None): extracted = extract_best_html_from_eml(getattr(msg, "eml_blob", None)) if extracted: html_body = extracted - if not _is_blank(html_body): + if not is_effectively_blank_html(html_body): body_html = html_body - elif not _is_blank(text_body): + elif not _is_blank_text(text_body): escaped = ( text_body.replace("&", "&") .replace("<", "<") diff --git a/docs/changelog.md b/docs/changelog.md index 0cda6df..034ccf0 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -31,13 +31,20 @@ --- -## v20260108-29-customer-delete-ticket-remark-scopes +## v20260108-30-customer-delete-ticket-remark-scopes - Updated customer deletion logic to allow removal of customers that have linked tickets or remarks. - Added explicit cleanup of related TicketScope and RemarkScope records before deleting the customer. - Ensured jobs linked to the customer are safely unassigned to prevent foreign key constraint errors. - Prevented deletion failures caused by existing ticket and remark relationships. +--- + +## v20260108-31-inbox-empty-body-attachment-render +- Detect “effectively empty” HTML bodies (e.g. empty HTML skeleton from Graph) and treat them as blank. +- Inbox import: when the Graph body is effectively empty and an EML is stored, extract the first HTML attachment and store it as html_body. +- Inbox message detail: for already-stored messages with effectively empty bodies, dynamically fall back to extracting the HTML attachment from the stored EML so the Inbox popup shows the report without requiring a reset. + ================================================================================================================================================ ## v0.1.18