From 62d65d20ad1157474a4948c4acbc4be22e40a6fa Mon Sep 17 00:00:00 2001 From: Ivo Oskamp Date: Fri, 9 Jan 2026 09:35:39 +0100 Subject: [PATCH] Auto-commit local changes before build (2026-01-09 09:35:39) --- .last-branch | 2 +- .../src/backend/app/parsers/veeam.py | 34 +++++++++++++++---- docs/changelog.md | 5 +++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/.last-branch b/.last-branch index b538399..d5e3c52 100644 --- a/.last-branch +++ b/.last-branch @@ -1 +1 @@ -v20260108-39-changelog-0.1.19 +v20260109-01-veeam-m365-overall-message diff --git a/containers/backupchecks/src/backend/app/parsers/veeam.py b/containers/backupchecks/src/backend/app/parsers/veeam.py index 2b54e5e..4e3ff6d 100644 --- a/containers/backupchecks/src/backend/app/parsers/veeam.py +++ b/containers/backupchecks/src/backend/app/parsers/veeam.py @@ -347,12 +347,34 @@ def _extract_m365_overall_details_message(html: str) -> Optional[str]: if not html: return None - # Look for the summary "Details" cell (typically a header_td with rowspan). - candidates = re.findall( - r']*rowspan\s*=\s*["\']?\s*2\s*["\']?[^>]*>(.*?)', - html, - flags=re.IGNORECASE | re.DOTALL, - ) + html = _normalize_html(html) + + # Strategy 1 (preferred): locate the "Details" header cell and then scan a small + # window after it for a rowspan cell that contains the overall message. + # + # We intentionally avoid a single giant regex over the entire HTML body to keep + # parsing fast and prevent worst-case backtracking on large messages. + candidates: List[str] = [] + + hdr = re.search(r'(?is)]*>\s*\s*Details\s*\s*', html) + if hdr: + window = html[hdr.end() : hdr.end() + 6000] + m = re.search( + r'(?is)]*rowspan\s*=\s*["\']?\s*(?:2|3|4|5|6|7|8|9|10)\s*["\']?[^>]*>(.*?)', + window, + ) + if m: + candidates = [m.group(1)] + + # Strategy 2 (fallback): look for rowspan cells with rowspan >= 2. + if not candidates: + all_rowspans = re.findall( + r'(?is)]*rowspan\s*=\s*["\']?\s*([2-9]|10)\s*["\']?[^>]*>(.*?)', + html, + ) + # re.findall above returns tuples (rowspan, content) + candidates = [c[1] for c in all_rowspans] if all_rowspans else [] + if not candidates: return None diff --git a/docs/changelog.md b/docs/changelog.md index 3d435e7..b9e897a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,3 +1,8 @@ +## v20260109-01-veeam-m365-overall-message + +- Fixed Veeam Backup for Microsoft 365 mail parsing where the overall summary message was not stored. +- Improved extraction of the overall details message from the mail content, ensuring permission and role warnings are correctly captured. +- Ensured the extracted overall message is consistently available in job details, run checks, and reporting views. ================================================================================================================================================