Auto-commit local changes before build (2026-01-12 12:41:47)

2026-01-12 12:41:47 +01:00 · 2026-01-12 12:41:47 +01:00 · ae61c563b8
commit ae61c563b8
parent b1522cef2f
3 changed files with 142 additions and 149 deletions
--- a/.last-branch
+++ b/.last-branch
@ -1 +1 @@
-v20260112-06-veeam-spc-alarm-summary-parser
+v20260112-07-veeam-vspc-active-alarms-parser
--- a/containers/backupchecks/src/backend/app/parsers/veeam.py
+++ b/containers/backupchecks/src/backend/app/parsers/veeam.py
@ -22,144 +22,141 @@ VEEAM_BACKUP_TYPES = [
 ]
-def _parse_vspc_summary_daily_alarm_report_from_html(html: str) -> Tuple[List[Dict], str, Optional[str]]:
+def _parse_vspc_active_alarms_from_html(html: str) -> Tuple[List[Dict], str, Optional[str]]:
-    """Parse Veeam Service Provider Console (VSPC) *summary daily* alarm notifications.
+    """Parse Veeam Service Provider Console (VSPC) Active Alarms summary emails.
-    The VSPC daily summary mail can contain alarms for multiple client companies.
+    The VSPC summary email can contain multiple companies. We keep this as a
-    To make this data actionable per customer in Backupchecks, we aggregate alarms
+    single Backupchecks run, but we prefix object names with the company name
-    per company and emit one object per company.
+    so alarms remain attributable per customer.
    Expected content typically includes blocks like:
      Company: <Name> (alarms: <N>)
      <Alarm details...>
    Since templates vary by VSPC version and localization, the parser uses a
    tolerant text-first strategy:
      1) Convert HTML -> text while preserving meaningful line breaks.
      2) Split into company blocks.
      3) For each company, extract alarm lines and derive a worst-case status.
    Returns: (objects, overall_status, overall_message)
    """
-    text = _html_to_text_preserve_lines(html)
+    html = _normalize_html(html)
-    if not text:
+    if not html:
        return [], "Success", None
-    lines = [ln.strip() for ln in text.split("\n") if (ln or "").strip()]
+    html_lower = html.lower()
-    if not lines:
+    if "veeam service provider console" not in html_lower or "company" not in html_lower:
        return [], "Success", None
-    # Identify company block headers.
+    # Extract each company block and its first alarm table.
-    company_re = re.compile(
+    company_header_re = re.compile(r"(?is)company:\s*([^<
-        r"(?i)^company\s*:\s*(?P<name>.+?)(?:\s*\(\s*alarms?\s*:\s*(?P<count>\d+)\s*\)\s*)?$"
+
]+?)\s*\(\s*alarms\s*:\s*(\d+)\s*\)")
    )
-    blocks: List[Tuple[str, Optional[int], List[str]]] = []
+    # Build spans using HTML positions.
-    cur_name: Optional[str] = None
+    headers = [(m.start(), m.end(), (m.group(1) or "").strip(), m.group(2)) for m in company_header_re.finditer(html)]
-    cur_count: Optional[int] = None
+    if not headers:
    cur_lines: List[str] = []
    def _flush():
        nonlocal cur_name, cur_count, cur_lines
        if cur_name:
            blocks.append((cur_name, cur_count, cur_lines))
        cur_name, cur_count, cur_lines = None, None, []
    for ln in lines:
        m = company_re.match(ln)
        if m:
            _flush()
            cur_name = (m.group("name") or "").strip()
            try:
                cur_count = int(m.group("count")) if m.group("count") else None
            except Exception:
                cur_count = None
            continue
        if cur_name:
            cur_lines.append(ln)
    _flush()
    if not blocks:
        return [], "Success", None
    objects: List[Dict] = []
    saw_failed = False
    saw_warning = False
    total_alarms = 0
    total_companies = 0
-    # Heuristics for severity classification.
+    for idx, (h_start, h_end, company_name, alarms_raw) in enumerate(headers):
-    failed_kw = re.compile(r"(?i)\b(critical|error|failed|failure)\b")
+        seg_start = h_end
-    warning_kw = re.compile(r"(?i)\b(warn|warning)\b")
+        seg_end = headers[idx + 1][0] if idx + 1 < len(headers) else len(html)
        segment_html = html[seg_start:seg_end]
-    for company_name, alarms_count, company_lines in blocks:
+        # Find the first table that looks like the Active Alarms table.
-        total_companies += 1
+        m_table = re.search(r"(?is)<table[^>]*>.*?(Current\s*State).*?</table>", segment_html)
-        if alarms_count is not None:
+        if not m_table:
-            total_alarms += alarms_count
+            continue
        table_html = m_table.group(0)
-        company_failed = False
+        # Parse rows and cells.
-        company_warning = False
+        row_re = re.compile(r"(?is)<tr[^>]*>(.*?)</tr>")
        cell_re = re.compile(r"(?is)<t[dh][^>]*>(.*?)</t[dh]>")
-        # Attempt to detect per-alarm object/message pairs.
+        rows = row_re.findall(table_html)
-        current_object: Optional[str] = None
+        if not rows:
-        alarm_entries: List[str] = []
+            continue
-        for ln in company_lines:
+        # Determine column indexes from header row.
-            # Skip boilerplate separators.
+        colmap = {}
-            if ln.strip("-–— ") == "":
+        header_cells = [_strip_html_tags(c).strip().lower() for c in cell_re.findall(rows[0])]
        for i, c in enumerate(header_cells):
            if c in {"current state", "currentstate"}:
                colmap["current_state"] = i
            elif c in {"object"}:
                colmap["object"] = i
            elif c in {"object type", "objecttype"}:
                colmap["object_type"] = i
            elif c in {"hostname"}:
                colmap["hostname"] = i
            elif c in {"time"}:
                colmap["time"] = i
            elif c in {"alarm name", "alarmname"}:
                colmap["alarm_name"] = i
            elif c in {"n. of repeats", "n.of repeats", "repeats"}:
                colmap["repeats"] = i
        # Basic validation: needs at least object + current state
        if "object" not in colmap or "current_state" not in colmap:
            continue
        # Convert the entire company segment to text once for details matching.
        seg_text = _html_to_text_preserve_lines(segment_html)
        seg_lines = [ln.strip() for ln in (seg_text or "").splitlines() if ln.strip()]
        for r in rows[1:]:
            cells = cell_re.findall(r)
            if not cells:
                continue
            plain = [_strip_html_tags(c).strip() for c in cells]
            obj_name = plain[colmap["object"]].strip() if colmap["object"] < len(plain) else ""
            if not obj_name:
                continue
-            m_obj = re.match(r"(?i)^(?:object|host|repository|vm)\s*[:\-]\s*(.+)$", ln)
+            current_state = plain[colmap["current_state"]].strip() if colmap["current_state"] < len(plain) else ""
-            if m_obj:
+            obj_type = plain[colmap.get("object_type", -1)].strip() if colmap.get("object_type", -1) >= 0 and colmap.get("object_type", -1) < len(plain) else ""
-                current_object = (m_obj.group(1) or "").strip() or None
+            hostname = plain[colmap.get("hostname", -1)].strip() if colmap.get("hostname", -1) >= 0 and colmap.get("hostname", -1) < len(plain) else ""
-                continue
+            at_time = plain[colmap.get("time", -1)].strip() if colmap.get("time", -1) >= 0 and colmap.get("time", -1) < len(plain) else ""
            alarm_name = plain[colmap.get("alarm_name", -1)].strip() if colmap.get("alarm_name", -1) >= 0 and colmap.get("alarm_name", -1) < len(plain) else ""
            repeats = plain[colmap.get("repeats", -1)].strip() if colmap.get("repeats", -1) >= 0 and colmap.get("repeats", -1) < len(plain) else ""
-            # Some templates format as "<Object> - <Message>".
+            state_lower = (current_state or "").lower()
-            if " - " in ln and not ln.lower().startswith("http"):
+            status = "Success"
-                left, right = ln.split(" - ", 1)
+            if state_lower in {"failed", "error", "critical"}:
-                left = left.strip()
+                status = "Failed"
-                right = right.strip()
+                saw_failed = True
-                if left and right and (current_object is None):
+            elif state_lower in {"warning", "warn"}:
-                    current_object = left
+                status = "Warning"
-                    alarm_entries.append(f"{left}: {right}")
+                saw_warning = True
-                    current_object = None
+
            # Try to find a more descriptive detail line in the company text.
            detail_line = None
            # Prefer lines that mention the object or alarm name and are long enough to be a real description.
            needles = [n for n in [obj_name, alarm_name] if n]
            for ln in seg_lines:
                if len(ln) < 25:
                    continue
                if any(n.lower() in ln.lower() for n in needles):
                    detail_line = ln
                    break
-            if current_object:
+            if not detail_line and alarm_name:
-                alarm_entries.append(f"{current_object}: {ln}")
+                # fallback: use alarm name with context
-                current_object = None
+                parts = [alarm_name]
-            else:
+                ctx = []
-                alarm_entries.append(ln)
+                if hostname:
                    ctx.append(f"Host: {hostname}")
                if at_time:
                    ctx.append(f"Time: {at_time}")
                if repeats:
                    ctx.append(f"Repeats: {repeats}")
                if ctx:
                    parts.append("(" + ", ".join(ctx) + ")")
                detail_line = " ".join(parts).strip() or None
-            # Severity inference.
+            objects.append(
-            if failed_kw.search(ln):
+                {
-                company_failed = True
+                    "name": f"{company_name} | {obj_name}" if company_name else obj_name,
-            elif warning_kw.search(ln):
+                    "type": obj_type or "Alarm",
-                company_warning = True
+                    "status": status,
-
+                    "error_message": detail_line,
-        # If we did not find explicit severity, fall back to alarm count > 0.
+                }
-        if not company_failed and not company_warning:
+            )
            if (alarms_count or 0) > 0:
                company_warning = True
        status = "Success"
        if company_failed:
            status = "Failed"
            saw_failed = True
        elif company_warning:
            status = "Warning"
            saw_warning = True
        objects.append(
            {
                "name": company_name,
                "type": "Company",
                "status": status,
                "error_message": "\n".join([e for e in alarm_entries if e]).strip() or None,
            }
        )
    overall_status = "Success"
    if saw_failed:
@ -168,12 +165,10 @@ def _parse_vspc_summary_daily_alarm_report_from_html(html: str) -> Tuple[List[Di
        overall_status = "Warning"
    overall_message = None
    if total_companies and total_alarms:
        overall_message = f"Companies with alarms: {total_companies}, Total alarms: {total_alarms}".strip()
    return objects, overall_status, overall_message
 def _parse_cloud_connect_report_from_html(html: str) -> Tuple[List[Dict], str]:
    """Parse Veeam Cloud Connect daily report (provider) HTML.
@ -1026,48 +1021,24 @@ def try_parse_veeam(msg: MailMessage) -> Tuple[bool, Dict, List[Dict]]:
        and "infrastructure status" in html_lower
    )
    # Veeam Service Provider Console (VSPC) summary daily alarm notification.
    # These mails can contain multiple client companies.
    # Detection is intentionally tolerant: company blocks are typically formatted
    # as "Company: <name> (alarms: N)".
    text_body = _html_to_text_preserve_lines(html_body)
    text_lower = (text_body or "").lower()
    is_vspc_daily_alarm_summary = (
        ("company:" in text_lower and "alarms" in text_lower)
        and (
            "service provider" in text_lower
            or "availability console" in text_lower
            or "vac" in text_lower
            or "veeam" in subject.lower()
        )
    )
    # Special-case: Veeam Backup for Microsoft 365 mails can come without a
    # subject marker. Detect via HTML and extract status from the banner.
    is_m365 = "veeam backup for microsoft 365" in html_lower
    # VSPC Active Alarms summary (no [Success]/[Warning] marker).
    is_vspc_active_alarms = (
        (\"veeam service provider console\" in html_lower)
        and (\"active alarms\" in html_lower or \"active alarms summary\" in subject.lower())
        and (\"company:\" in html_lower and \"alarms\" in html_lower)
    )
    # If we cannot detect a status marker and this is not an M365 report,
    # we still try to parse when the subject strongly indicates a Veeam report.
-    if not m_status and not m_finished and not is_m365 and not is_cloud_connect_report and not is_vspc_daily_alarm_summary:
+    if not m_status and not m_finished and not is_m365 and not is_cloud_connect_report and not is_vspc_active_alarms:
        lowered = subject.lower()
        if not any(k in lowered for k in ["veeam", "cloud connect", "backup job", "backup copy job", "replica job", "configuration backup", "health check"]):
            return False, {}, []
    # Handle VSPC daily alarm summary early.
    if is_vspc_daily_alarm_summary:
        objects, overall_status, overall_message = _parse_vspc_summary_daily_alarm_report_from_html(html_body)
        result = {
            "backup_software": "Veeam",
            "backup_type": "Service Provider Console",
            "job_name": "Daily alarms",
            "overall_status": overall_status,
        }
        if overall_message:
            result["overall_message"] = overall_message
        return True, result, objects
    # Handle Cloud Connect daily report early: overall status is derived from row colours.
    if is_cloud_connect_report:
        objects, overall_status = _parse_cloud_connect_report_from_html(html_body)
@ -1089,6 +1060,21 @@ def try_parse_veeam(msg: MailMessage) -> Tuple[bool, Dict, List[Dict]]:
        return True, result, objects
    # Handle VSPC Active Alarms summary early.
    if is_vspc_active_alarms:
        objects, overall_status, overall_message = _parse_vspc_active_alarms_from_html(html_body)
        result = {
            \"backup_software\": \"Veeam\",
            \"backup_type\": \"Service Provider Console\",
            \"job_name\": \"Active alarms summary\",
            \"overall_status\": overall_status,
        }
        if overall_message:
            result[\"overall_message\"] = overall_message
        return True, result, objects
    if m_status:
        status_word = m_status.group(1)
        rest = m_status.group(2)
--- a/docs/changelog.md
+++ b/docs/changelog.md
@ -159,6 +159,13 @@
 - Derived overall status from the worst status found across all companies (Failed > Warning > Success).
 - Registered "Service Provider Console" as a supported Veeam backup type for consistent reporting.
 --- 
 ## v20260112-07-veeam-vspc-active-alarms-parser
 - Improved detection for VSPC “Active Alarms” summary emails based on HTML content to prevent fallback to other Veeam parsers.
 - Implemented parsing of alarms per Company and per alarm row, creating objects named “<Company> | <Object>”.
 - Derived object status from “Current State” and attached alarm details where available (fallback to Alarm Name).
 ================================================================================================================================================
 ## v0.1.19
 This release delivers a broad set of improvements focused on reliability, transparency, and operational control across mail processing, administrative auditing, and Run Checks workflows. The changes aim to make message handling more robust, provide better insight for administrators, and give operators clearer and more flexible control when reviewing backup runs.
`@ -1 +1 @@`
	`v20260112-06-veeam-spc-alarm-summary-parser`	`v20260112-07-veeam-vspc-active-alarms-parser`