Auto-commit local changes before build (2026-01-08 14:18:20) #66

Merged
ivooskamp merged 1 commits from v20260108-31-inbox-empty-body-attachment-render into main 2026-01-13 11:26:19 +01:00
5 changed files with 57 additions and 10 deletions

View File

@ -1 +1 @@
v20260108-30-customer-delete-ticket-remark-scopes v20260108-31-inbox-empty-body-attachment-render

View File

@ -3,6 +3,7 @@ from __future__ import annotations
from email import policy from email import policy
from email.parser import BytesParser from email.parser import BytesParser
from email.utils import parseaddr from email.utils import parseaddr
import re
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
@ -125,3 +126,42 @@ def extract_best_html_from_eml(
return None return None
_fn, html_text = items[0] _fn, html_text = items[0]
return html_text or None return html_text or None
def is_effectively_blank_html(value: str | None) -> bool:
"""Return True when an HTML body is effectively empty.
Some sources produce Graph bodies that are non-empty strings but contain only
an empty HTML skeleton (e.g. <html><body></body></html>) or whitespace.
In those cases we want to treat the body as empty so we can fall back to an
HTML report attachment stored in the EML.
"""
if value is None:
return True
if not isinstance(value, str):
return False
raw = value.strip()
if raw == "":
return True
# Fast path: if we clearly have content-bearing elements, it is not blank.
# (This avoids false positives for report-like HTML.)
if re.search(r"<(table|img|svg|pre|ul|ol|li|iframe|object|embed)\b", raw, re.IGNORECASE):
return False
# Try to isolate the body content; if no body tag is present, evaluate the full string.
m = re.search(r"<body\b[^>]*>(.*?)</body>", raw, re.IGNORECASE | re.DOTALL)
body = m.group(1) if m else raw
# Remove comments, scripts, and styles.
body = re.sub(r"<!--.*?-->", "", body, flags=re.DOTALL)
body = re.sub(r"<script\b[^>]*>.*?</script>", "", body, flags=re.IGNORECASE | re.DOTALL)
body = re.sub(r"<style\b[^>]*>.*?</style>", "", body, flags=re.IGNORECASE | re.DOTALL)
# Strip tags and common non-breaking whitespace entities.
text = re.sub(r"<[^>]+>", "", body)
text = text.replace("&nbsp;", " ").replace("\xa0", " ")
text = re.sub(r"\s+", "", text)
return text == ""

View File

@ -13,7 +13,7 @@ from sqlalchemy import func
from . import db from . import db
from .models import MailMessage, SystemSettings, Job, JobRun from .models import MailMessage, SystemSettings, Job, JobRun
from .parsers import parse_mail_message from .parsers import parse_mail_message
from .email_utils import normalize_from_address, extract_best_html_from_eml from .email_utils import normalize_from_address, extract_best_html_from_eml, is_effectively_blank_html
from .job_matching import find_matching_job from .job_matching import find_matching_job
@ -230,13 +230,13 @@ def _store_messages(settings: SystemSettings, messages):
# Some systems send empty bodies and put the actual report in an HTML attachment. # Some systems send empty bodies and put the actual report in an HTML attachment.
# Graph may still return a body that only contains whitespace/newlines; treat that # Graph may still return a body that only contains whitespace/newlines; treat that
# as empty so we can fall back to the attachment. # as empty so we can fall back to the attachment.
def _is_blank(s): def _is_blank_text(s):
return s is None or (isinstance(s, str) and s.strip() == "") return s is None or (isinstance(s, str) and s.strip() == "")
# If we have raw EML bytes and no meaningful body content, extract the first # If we have raw EML bytes and no meaningful body content, extract the first
# HTML attachment and use it as the HTML body so parsers and the inbox preview # HTML attachment and use it as the HTML body so parsers and the inbox preview
# can work. # can work.
if _is_blank(mail.html_body) and _is_blank(mail.text_body) and mail.eml_blob: if is_effectively_blank_html(mail.html_body) and _is_blank_text(mail.text_body) and mail.eml_blob:
attachment_html = extract_best_html_from_eml(mail.eml_blob) attachment_html = extract_best_html_from_eml(mail.eml_blob)
if attachment_html: if attachment_html:
mail.html_body = attachment_html mail.html_body = attachment_html

View File

@ -1,7 +1,7 @@
from .routes_shared import * # noqa: F401,F403 from .routes_shared import * # noqa: F401,F403
from .routes_shared import _format_datetime, _log_admin_event, _send_mail_message_eml_download from .routes_shared import _format_datetime, _log_admin_event, _send_mail_message_eml_download
from ..email_utils import extract_best_html_from_eml from ..email_utils import extract_best_html_from_eml, is_effectively_blank_html
import time import time
@ -113,7 +113,7 @@ def inbox_message_detail(message_id: int):
), ),
} }
def _is_blank(s): def _is_blank_text(s):
return s is None or (isinstance(s, str) and s.strip() == "") return s is None or (isinstance(s, str) and s.strip() == "")
html_body = getattr(msg, "html_body", None) html_body = getattr(msg, "html_body", None)
@ -121,14 +121,14 @@ def inbox_message_detail(message_id: int):
# For legacy messages: if the Graph body is empty/whitespace but the real report # For legacy messages: if the Graph body is empty/whitespace but the real report
# is an HTML attachment in the stored EML, extract and render it. # is an HTML attachment in the stored EML, extract and render it.
if _is_blank(html_body) and _is_blank(text_body) and getattr(msg, "eml_blob", None): if is_effectively_blank_html(html_body) and _is_blank_text(text_body) and getattr(msg, "eml_blob", None):
extracted = extract_best_html_from_eml(getattr(msg, "eml_blob", None)) extracted = extract_best_html_from_eml(getattr(msg, "eml_blob", None))
if extracted: if extracted:
html_body = extracted html_body = extracted
if not _is_blank(html_body): if not is_effectively_blank_html(html_body):
body_html = html_body body_html = html_body
elif not _is_blank(text_body): elif not _is_blank_text(text_body):
escaped = ( escaped = (
text_body.replace("&", "&amp;") text_body.replace("&", "&amp;")
.replace("<", "&lt;") .replace("<", "&lt;")

View File

@ -31,13 +31,20 @@
--- ---
## v20260108-29-customer-delete-ticket-remark-scopes ## v20260108-30-customer-delete-ticket-remark-scopes
- Updated customer deletion logic to allow removal of customers that have linked tickets or remarks. - Updated customer deletion logic to allow removal of customers that have linked tickets or remarks.
- Added explicit cleanup of related TicketScope and RemarkScope records before deleting the customer. - Added explicit cleanup of related TicketScope and RemarkScope records before deleting the customer.
- Ensured jobs linked to the customer are safely unassigned to prevent foreign key constraint errors. - Ensured jobs linked to the customer are safely unassigned to prevent foreign key constraint errors.
- Prevented deletion failures caused by existing ticket and remark relationships. - Prevented deletion failures caused by existing ticket and remark relationships.
---
## v20260108-31-inbox-empty-body-attachment-render
- Detect “effectively empty” HTML bodies (e.g. empty HTML skeleton from Graph) and treat them as blank.
- Inbox import: when the Graph body is effectively empty and an EML is stored, extract the first HTML attachment and store it as html_body.
- Inbox message detail: for already-stored messages with effectively empty bodies, dynamically fall back to extracting the HTML attachment from the stored EML so the Inbox popup shows the report without requiring a reset.
================================================================================================================================================ ================================================================================================================================================
## v0.1.18 ## v0.1.18