Auto-commit local changes before build (2026-01-12 12:41:47) #94

Merged
ivooskamp merged 1 commits from v20260112-07-veeam-vspc-active-alarms-parser into main 2026-01-13 11:42:15 +01:00
3 changed files with 142 additions and 149 deletions
Showing only changes of commit ae61c563b8 - Show all commits

View File

@ -1 +1 @@
v20260112-06-veeam-spc-alarm-summary-parser
v20260112-07-veeam-vspc-active-alarms-parser

View File

@ -22,144 +22,141 @@ VEEAM_BACKUP_TYPES = [
]
def _parse_vspc_summary_daily_alarm_report_from_html(html: str) -> Tuple[List[Dict], str, Optional[str]]:
"""Parse Veeam Service Provider Console (VSPC) *summary daily* alarm notifications.
def _parse_vspc_active_alarms_from_html(html: str) -> Tuple[List[Dict], str, Optional[str]]:
"""Parse Veeam Service Provider Console (VSPC) Active Alarms summary emails.
The VSPC daily summary mail can contain alarms for multiple client companies.
To make this data actionable per customer in Backupchecks, we aggregate alarms
per company and emit one object per company.
Expected content typically includes blocks like:
Company: <Name> (alarms: <N>)
<Alarm details...>
Since templates vary by VSPC version and localization, the parser uses a
tolerant text-first strategy:
1) Convert HTML -> text while preserving meaningful line breaks.
2) Split into company blocks.
3) For each company, extract alarm lines and derive a worst-case status.
The VSPC summary email can contain multiple companies. We keep this as a
single Backupchecks run, but we prefix object names with the company name
so alarms remain attributable per customer.
Returns: (objects, overall_status, overall_message)
"""
text = _html_to_text_preserve_lines(html)
if not text:
html = _normalize_html(html)
if not html:
return [], "Success", None
lines = [ln.strip() for ln in text.split("\n") if (ln or "").strip()]
if not lines:
html_lower = html.lower()
if "veeam service provider console" not in html_lower or "company" not in html_lower:
return [], "Success", None
# Identify company block headers.
company_re = re.compile(
r"(?i)^company\s*:\s*(?P<name>.+?)(?:\s*\(\s*alarms?\s*:\s*(?P<count>\d+)\s*\)\s*)?$"
)
# Extract each company block and its first alarm table.
company_header_re = re.compile(r"(?is)company:\s*([^<
]+?)\s*\(\s*alarms\s*:\s*(\d+)\s*\)")
blocks: List[Tuple[str, Optional[int], List[str]]] = []
cur_name: Optional[str] = None
cur_count: Optional[int] = None
cur_lines: List[str] = []
def _flush():
nonlocal cur_name, cur_count, cur_lines
if cur_name:
blocks.append((cur_name, cur_count, cur_lines))
cur_name, cur_count, cur_lines = None, None, []
for ln in lines:
m = company_re.match(ln)
if m:
_flush()
cur_name = (m.group("name") or "").strip()
try:
cur_count = int(m.group("count")) if m.group("count") else None
except Exception:
cur_count = None
continue
if cur_name:
cur_lines.append(ln)
_flush()
if not blocks:
# Build spans using HTML positions.
headers = [(m.start(), m.end(), (m.group(1) or "").strip(), m.group(2)) for m in company_header_re.finditer(html)]
if not headers:
return [], "Success", None
objects: List[Dict] = []
saw_failed = False
saw_warning = False
total_alarms = 0
total_companies = 0
# Heuristics for severity classification.
failed_kw = re.compile(r"(?i)\b(critical|error|failed|failure)\b")
warning_kw = re.compile(r"(?i)\b(warn|warning)\b")
for idx, (h_start, h_end, company_name, alarms_raw) in enumerate(headers):
seg_start = h_end
seg_end = headers[idx + 1][0] if idx + 1 < len(headers) else len(html)
segment_html = html[seg_start:seg_end]
for company_name, alarms_count, company_lines in blocks:
total_companies += 1
if alarms_count is not None:
total_alarms += alarms_count
# Find the first table that looks like the Active Alarms table.
m_table = re.search(r"(?is)<table[^>]*>.*?(Current\s*State).*?</table>", segment_html)
if not m_table:
continue
table_html = m_table.group(0)
company_failed = False
company_warning = False
# Parse rows and cells.
row_re = re.compile(r"(?is)<tr[^>]*>(.*?)</tr>")
cell_re = re.compile(r"(?is)<t[dh][^>]*>(.*?)</t[dh]>")
# Attempt to detect per-alarm object/message pairs.
current_object: Optional[str] = None
alarm_entries: List[str] = []
rows = row_re.findall(table_html)
if not rows:
continue
for ln in company_lines:
# Skip boilerplate separators.
if ln.strip("-–— ") == "":
# Determine column indexes from header row.
colmap = {}
header_cells = [_strip_html_tags(c).strip().lower() for c in cell_re.findall(rows[0])]
for i, c in enumerate(header_cells):
if c in {"current state", "currentstate"}:
colmap["current_state"] = i
elif c in {"object"}:
colmap["object"] = i
elif c in {"object type", "objecttype"}:
colmap["object_type"] = i
elif c in {"hostname"}:
colmap["hostname"] = i
elif c in {"time"}:
colmap["time"] = i
elif c in {"alarm name", "alarmname"}:
colmap["alarm_name"] = i
elif c in {"n. of repeats", "n.of repeats", "repeats"}:
colmap["repeats"] = i
# Basic validation: needs at least object + current state
if "object" not in colmap or "current_state" not in colmap:
continue
# Convert the entire company segment to text once for details matching.
seg_text = _html_to_text_preserve_lines(segment_html)
seg_lines = [ln.strip() for ln in (seg_text or "").splitlines() if ln.strip()]
for r in rows[1:]:
cells = cell_re.findall(r)
if not cells:
continue
plain = [_strip_html_tags(c).strip() for c in cells]
obj_name = plain[colmap["object"]].strip() if colmap["object"] < len(plain) else ""
if not obj_name:
continue
m_obj = re.match(r"(?i)^(?:object|host|repository|vm)\s*[:\-]\s*(.+)$", ln)
if m_obj:
current_object = (m_obj.group(1) or "").strip() or None
continue
current_state = plain[colmap["current_state"]].strip() if colmap["current_state"] < len(plain) else ""
obj_type = plain[colmap.get("object_type", -1)].strip() if colmap.get("object_type", -1) >= 0 and colmap.get("object_type", -1) < len(plain) else ""
hostname = plain[colmap.get("hostname", -1)].strip() if colmap.get("hostname", -1) >= 0 and colmap.get("hostname", -1) < len(plain) else ""
at_time = plain[colmap.get("time", -1)].strip() if colmap.get("time", -1) >= 0 and colmap.get("time", -1) < len(plain) else ""
alarm_name = plain[colmap.get("alarm_name", -1)].strip() if colmap.get("alarm_name", -1) >= 0 and colmap.get("alarm_name", -1) < len(plain) else ""
repeats = plain[colmap.get("repeats", -1)].strip() if colmap.get("repeats", -1) >= 0 and colmap.get("repeats", -1) < len(plain) else ""
# Some templates format as "<Object> - <Message>".
if " - " in ln and not ln.lower().startswith("http"):
left, right = ln.split(" - ", 1)
left = left.strip()
right = right.strip()
if left and right and (current_object is None):
current_object = left
alarm_entries.append(f"{left}: {right}")
current_object = None
state_lower = (current_state or "").lower()
status = "Success"
if state_lower in {"failed", "error", "critical"}:
status = "Failed"
saw_failed = True
elif state_lower in {"warning", "warn"}:
status = "Warning"
saw_warning = True
# Try to find a more descriptive detail line in the company text.
detail_line = None
# Prefer lines that mention the object or alarm name and are long enough to be a real description.
needles = [n for n in [obj_name, alarm_name] if n]
for ln in seg_lines:
if len(ln) < 25:
continue
if any(n.lower() in ln.lower() for n in needles):
detail_line = ln
break
if current_object:
alarm_entries.append(f"{current_object}: {ln}")
current_object = None
else:
alarm_entries.append(ln)
if not detail_line and alarm_name:
# fallback: use alarm name with context
parts = [alarm_name]
ctx = []
if hostname:
ctx.append(f"Host: {hostname}")
if at_time:
ctx.append(f"Time: {at_time}")
if repeats:
ctx.append(f"Repeats: {repeats}")
if ctx:
parts.append("(" + ", ".join(ctx) + ")")
detail_line = " ".join(parts).strip() or None
# Severity inference.
if failed_kw.search(ln):
company_failed = True
elif warning_kw.search(ln):
company_warning = True
# If we did not find explicit severity, fall back to alarm count > 0.
if not company_failed and not company_warning:
if (alarms_count or 0) > 0:
company_warning = True
status = "Success"
if company_failed:
status = "Failed"
saw_failed = True
elif company_warning:
status = "Warning"
saw_warning = True
objects.append(
{
"name": company_name,
"type": "Company",
"status": status,
"error_message": "\n".join([e for e in alarm_entries if e]).strip() or None,
}
)
objects.append(
{
"name": f"{company_name} | {obj_name}" if company_name else obj_name,
"type": obj_type or "Alarm",
"status": status,
"error_message": detail_line,
}
)
overall_status = "Success"
if saw_failed:
@ -168,12 +165,10 @@ def _parse_vspc_summary_daily_alarm_report_from_html(html: str) -> Tuple[List[Di
overall_status = "Warning"
overall_message = None
if total_companies and total_alarms:
overall_message = f"Companies with alarms: {total_companies}, Total alarms: {total_alarms}".strip()
return objects, overall_status, overall_message
def _parse_cloud_connect_report_from_html(html: str) -> Tuple[List[Dict], str]:
"""Parse Veeam Cloud Connect daily report (provider) HTML.
@ -1026,48 +1021,24 @@ def try_parse_veeam(msg: MailMessage) -> Tuple[bool, Dict, List[Dict]]:
and "infrastructure status" in html_lower
)
# Veeam Service Provider Console (VSPC) summary daily alarm notification.
# These mails can contain multiple client companies.
# Detection is intentionally tolerant: company blocks are typically formatted
# as "Company: <name> (alarms: N)".
text_body = _html_to_text_preserve_lines(html_body)
text_lower = (text_body or "").lower()
is_vspc_daily_alarm_summary = (
("company:" in text_lower and "alarms" in text_lower)
and (
"service provider" in text_lower
or "availability console" in text_lower
or "vac" in text_lower
or "veeam" in subject.lower()
)
)
# Special-case: Veeam Backup for Microsoft 365 mails can come without a
# subject marker. Detect via HTML and extract status from the banner.
is_m365 = "veeam backup for microsoft 365" in html_lower
# VSPC Active Alarms summary (no [Success]/[Warning] marker).
is_vspc_active_alarms = (
(\"veeam service provider console\" in html_lower)
and (\"active alarms\" in html_lower or \"active alarms summary\" in subject.lower())
and (\"company:\" in html_lower and \"alarms\" in html_lower)
)
# If we cannot detect a status marker and this is not an M365 report,
# we still try to parse when the subject strongly indicates a Veeam report.
if not m_status and not m_finished and not is_m365 and not is_cloud_connect_report and not is_vspc_daily_alarm_summary:
if not m_status and not m_finished and not is_m365 and not is_cloud_connect_report and not is_vspc_active_alarms:
lowered = subject.lower()
if not any(k in lowered for k in ["veeam", "cloud connect", "backup job", "backup copy job", "replica job", "configuration backup", "health check"]):
return False, {}, []
# Handle VSPC daily alarm summary early.
if is_vspc_daily_alarm_summary:
objects, overall_status, overall_message = _parse_vspc_summary_daily_alarm_report_from_html(html_body)
result = {
"backup_software": "Veeam",
"backup_type": "Service Provider Console",
"job_name": "Daily alarms",
"overall_status": overall_status,
}
if overall_message:
result["overall_message"] = overall_message
return True, result, objects
# Handle Cloud Connect daily report early: overall status is derived from row colours.
if is_cloud_connect_report:
objects, overall_status = _parse_cloud_connect_report_from_html(html_body)
@ -1089,6 +1060,21 @@ def try_parse_veeam(msg: MailMessage) -> Tuple[bool, Dict, List[Dict]]:
return True, result, objects
# Handle VSPC Active Alarms summary early.
if is_vspc_active_alarms:
objects, overall_status, overall_message = _parse_vspc_active_alarms_from_html(html_body)
result = {
\"backup_software\": \"Veeam\",
\"backup_type\": \"Service Provider Console\",
\"job_name\": \"Active alarms summary\",
\"overall_status\": overall_status,
}
if overall_message:
result[\"overall_message\"] = overall_message
return True, result, objects
if m_status:
status_word = m_status.group(1)
rest = m_status.group(2)

View File

@ -159,6 +159,13 @@
- Derived overall status from the worst status found across all companies (Failed > Warning > Success).
- Registered "Service Provider Console" as a supported Veeam backup type for consistent reporting.
---
## v20260112-07-veeam-vspc-active-alarms-parser
- Improved detection for VSPC “Active Alarms” summary emails based on HTML content to prevent fallback to other Veeam parsers.
- Implemented parsing of alarms per Company and per alarm row, creating objects named “<Company> | <Object>”.
- Derived object status from “Current State” and attached alarm details where available (fallback to Alarm Name).
================================================================================================================================================
## v0.1.19
This release delivers a broad set of improvements focused on reliability, transparency, and operational control across mail processing, administrative auditing, and Run Checks workflows. The changes aim to make message handling more robust, provide better insight for administrators, and give operators clearer and more flexible control when reviewing backup runs.