Auto-commit local changes before build (2026-01-12 12:41:47)

This commit is contained in:
Ivo Oskamp 2026-01-12 12:41:47 +01:00
parent b1522cef2f
commit ae61c563b8
3 changed files with 142 additions and 149 deletions

View File

@ -1 +1 @@
v20260112-06-veeam-spc-alarm-summary-parser v20260112-07-veeam-vspc-active-alarms-parser

View File

@ -22,144 +22,141 @@ VEEAM_BACKUP_TYPES = [
] ]
def _parse_vspc_summary_daily_alarm_report_from_html(html: str) -> Tuple[List[Dict], str, Optional[str]]: def _parse_vspc_active_alarms_from_html(html: str) -> Tuple[List[Dict], str, Optional[str]]:
"""Parse Veeam Service Provider Console (VSPC) *summary daily* alarm notifications. """Parse Veeam Service Provider Console (VSPC) Active Alarms summary emails.
The VSPC daily summary mail can contain alarms for multiple client companies. The VSPC summary email can contain multiple companies. We keep this as a
To make this data actionable per customer in Backupchecks, we aggregate alarms single Backupchecks run, but we prefix object names with the company name
per company and emit one object per company. so alarms remain attributable per customer.
Expected content typically includes blocks like:
Company: <Name> (alarms: <N>)
<Alarm details...>
Since templates vary by VSPC version and localization, the parser uses a
tolerant text-first strategy:
1) Convert HTML -> text while preserving meaningful line breaks.
2) Split into company blocks.
3) For each company, extract alarm lines and derive a worst-case status.
Returns: (objects, overall_status, overall_message) Returns: (objects, overall_status, overall_message)
""" """
text = _html_to_text_preserve_lines(html) html = _normalize_html(html)
if not text: if not html:
return [], "Success", None return [], "Success", None
lines = [ln.strip() for ln in text.split("\n") if (ln or "").strip()] html_lower = html.lower()
if not lines: if "veeam service provider console" not in html_lower or "company" not in html_lower:
return [], "Success", None return [], "Success", None
# Identify company block headers. # Extract each company block and its first alarm table.
company_re = re.compile( company_header_re = re.compile(r"(?is)company:\s*([^<
r"(?i)^company\s*:\s*(?P<name>.+?)(?:\s*\(\s*alarms?\s*:\s*(?P<count>\d+)\s*\)\s*)?$" ]+?)\s*\(\s*alarms\s*:\s*(\d+)\s*\)")
)
blocks: List[Tuple[str, Optional[int], List[str]]] = [] # Build spans using HTML positions.
cur_name: Optional[str] = None headers = [(m.start(), m.end(), (m.group(1) or "").strip(), m.group(2)) for m in company_header_re.finditer(html)]
cur_count: Optional[int] = None if not headers:
cur_lines: List[str] = []
def _flush():
nonlocal cur_name, cur_count, cur_lines
if cur_name:
blocks.append((cur_name, cur_count, cur_lines))
cur_name, cur_count, cur_lines = None, None, []
for ln in lines:
m = company_re.match(ln)
if m:
_flush()
cur_name = (m.group("name") or "").strip()
try:
cur_count = int(m.group("count")) if m.group("count") else None
except Exception:
cur_count = None
continue
if cur_name:
cur_lines.append(ln)
_flush()
if not blocks:
return [], "Success", None return [], "Success", None
objects: List[Dict] = [] objects: List[Dict] = []
saw_failed = False saw_failed = False
saw_warning = False saw_warning = False
total_alarms = 0
total_companies = 0
# Heuristics for severity classification. for idx, (h_start, h_end, company_name, alarms_raw) in enumerate(headers):
failed_kw = re.compile(r"(?i)\b(critical|error|failed|failure)\b") seg_start = h_end
warning_kw = re.compile(r"(?i)\b(warn|warning)\b") seg_end = headers[idx + 1][0] if idx + 1 < len(headers) else len(html)
segment_html = html[seg_start:seg_end]
for company_name, alarms_count, company_lines in blocks: # Find the first table that looks like the Active Alarms table.
total_companies += 1 m_table = re.search(r"(?is)<table[^>]*>.*?(Current\s*State).*?</table>", segment_html)
if alarms_count is not None: if not m_table:
total_alarms += alarms_count continue
table_html = m_table.group(0)
company_failed = False # Parse rows and cells.
company_warning = False row_re = re.compile(r"(?is)<tr[^>]*>(.*?)</tr>")
cell_re = re.compile(r"(?is)<t[dh][^>]*>(.*?)</t[dh]>")
# Attempt to detect per-alarm object/message pairs. rows = row_re.findall(table_html)
current_object: Optional[str] = None if not rows:
alarm_entries: List[str] = [] continue
for ln in company_lines: # Determine column indexes from header row.
# Skip boilerplate separators. colmap = {}
if ln.strip("-–— ") == "": header_cells = [_strip_html_tags(c).strip().lower() for c in cell_re.findall(rows[0])]
for i, c in enumerate(header_cells):
if c in {"current state", "currentstate"}:
colmap["current_state"] = i
elif c in {"object"}:
colmap["object"] = i
elif c in {"object type", "objecttype"}:
colmap["object_type"] = i
elif c in {"hostname"}:
colmap["hostname"] = i
elif c in {"time"}:
colmap["time"] = i
elif c in {"alarm name", "alarmname"}:
colmap["alarm_name"] = i
elif c in {"n. of repeats", "n.of repeats", "repeats"}:
colmap["repeats"] = i
# Basic validation: needs at least object + current state
if "object" not in colmap or "current_state" not in colmap:
continue
# Convert the entire company segment to text once for details matching.
seg_text = _html_to_text_preserve_lines(segment_html)
seg_lines = [ln.strip() for ln in (seg_text or "").splitlines() if ln.strip()]
for r in rows[1:]:
cells = cell_re.findall(r)
if not cells:
continue
plain = [_strip_html_tags(c).strip() for c in cells]
obj_name = plain[colmap["object"]].strip() if colmap["object"] < len(plain) else ""
if not obj_name:
continue continue
m_obj = re.match(r"(?i)^(?:object|host|repository|vm)\s*[:\-]\s*(.+)$", ln) current_state = plain[colmap["current_state"]].strip() if colmap["current_state"] < len(plain) else ""
if m_obj: obj_type = plain[colmap.get("object_type", -1)].strip() if colmap.get("object_type", -1) >= 0 and colmap.get("object_type", -1) < len(plain) else ""
current_object = (m_obj.group(1) or "").strip() or None hostname = plain[colmap.get("hostname", -1)].strip() if colmap.get("hostname", -1) >= 0 and colmap.get("hostname", -1) < len(plain) else ""
continue at_time = plain[colmap.get("time", -1)].strip() if colmap.get("time", -1) >= 0 and colmap.get("time", -1) < len(plain) else ""
alarm_name = plain[colmap.get("alarm_name", -1)].strip() if colmap.get("alarm_name", -1) >= 0 and colmap.get("alarm_name", -1) < len(plain) else ""
repeats = plain[colmap.get("repeats", -1)].strip() if colmap.get("repeats", -1) >= 0 and colmap.get("repeats", -1) < len(plain) else ""
# Some templates format as "<Object> - <Message>". state_lower = (current_state or "").lower()
if " - " in ln and not ln.lower().startswith("http"): status = "Success"
left, right = ln.split(" - ", 1) if state_lower in {"failed", "error", "critical"}:
left = left.strip() status = "Failed"
right = right.strip() saw_failed = True
if left and right and (current_object is None): elif state_lower in {"warning", "warn"}:
current_object = left status = "Warning"
alarm_entries.append(f"{left}: {right}") saw_warning = True
current_object = None
# Try to find a more descriptive detail line in the company text.
detail_line = None
# Prefer lines that mention the object or alarm name and are long enough to be a real description.
needles = [n for n in [obj_name, alarm_name] if n]
for ln in seg_lines:
if len(ln) < 25:
continue continue
if any(n.lower() in ln.lower() for n in needles):
detail_line = ln
break
if current_object: if not detail_line and alarm_name:
alarm_entries.append(f"{current_object}: {ln}") # fallback: use alarm name with context
current_object = None parts = [alarm_name]
else: ctx = []
alarm_entries.append(ln) if hostname:
ctx.append(f"Host: {hostname}")
if at_time:
ctx.append(f"Time: {at_time}")
if repeats:
ctx.append(f"Repeats: {repeats}")
if ctx:
parts.append("(" + ", ".join(ctx) + ")")
detail_line = " ".join(parts).strip() or None
# Severity inference. objects.append(
if failed_kw.search(ln): {
company_failed = True "name": f"{company_name} | {obj_name}" if company_name else obj_name,
elif warning_kw.search(ln): "type": obj_type or "Alarm",
company_warning = True "status": status,
"error_message": detail_line,
# If we did not find explicit severity, fall back to alarm count > 0. }
if not company_failed and not company_warning: )
if (alarms_count or 0) > 0:
company_warning = True
status = "Success"
if company_failed:
status = "Failed"
saw_failed = True
elif company_warning:
status = "Warning"
saw_warning = True
objects.append(
{
"name": company_name,
"type": "Company",
"status": status,
"error_message": "\n".join([e for e in alarm_entries if e]).strip() or None,
}
)
overall_status = "Success" overall_status = "Success"
if saw_failed: if saw_failed:
@ -168,12 +165,10 @@ def _parse_vspc_summary_daily_alarm_report_from_html(html: str) -> Tuple[List[Di
overall_status = "Warning" overall_status = "Warning"
overall_message = None overall_message = None
if total_companies and total_alarms:
overall_message = f"Companies with alarms: {total_companies}, Total alarms: {total_alarms}".strip()
return objects, overall_status, overall_message return objects, overall_status, overall_message
def _parse_cloud_connect_report_from_html(html: str) -> Tuple[List[Dict], str]: def _parse_cloud_connect_report_from_html(html: str) -> Tuple[List[Dict], str]:
"""Parse Veeam Cloud Connect daily report (provider) HTML. """Parse Veeam Cloud Connect daily report (provider) HTML.
@ -1026,48 +1021,24 @@ def try_parse_veeam(msg: MailMessage) -> Tuple[bool, Dict, List[Dict]]:
and "infrastructure status" in html_lower and "infrastructure status" in html_lower
) )
# Veeam Service Provider Console (VSPC) summary daily alarm notification.
# These mails can contain multiple client companies.
# Detection is intentionally tolerant: company blocks are typically formatted
# as "Company: <name> (alarms: N)".
text_body = _html_to_text_preserve_lines(html_body)
text_lower = (text_body or "").lower()
is_vspc_daily_alarm_summary = (
("company:" in text_lower and "alarms" in text_lower)
and (
"service provider" in text_lower
or "availability console" in text_lower
or "vac" in text_lower
or "veeam" in subject.lower()
)
)
# Special-case: Veeam Backup for Microsoft 365 mails can come without a # Special-case: Veeam Backup for Microsoft 365 mails can come without a
# subject marker. Detect via HTML and extract status from the banner. # subject marker. Detect via HTML and extract status from the banner.
is_m365 = "veeam backup for microsoft 365" in html_lower is_m365 = "veeam backup for microsoft 365" in html_lower
# VSPC Active Alarms summary (no [Success]/[Warning] marker).
is_vspc_active_alarms = (
(\"veeam service provider console\" in html_lower)
and (\"active alarms\" in html_lower or \"active alarms summary\" in subject.lower())
and (\"company:\" in html_lower and \"alarms\" in html_lower)
)
# If we cannot detect a status marker and this is not an M365 report, # If we cannot detect a status marker and this is not an M365 report,
# we still try to parse when the subject strongly indicates a Veeam report. # we still try to parse when the subject strongly indicates a Veeam report.
if not m_status and not m_finished and not is_m365 and not is_cloud_connect_report and not is_vspc_daily_alarm_summary: if not m_status and not m_finished and not is_m365 and not is_cloud_connect_report and not is_vspc_active_alarms:
lowered = subject.lower() lowered = subject.lower()
if not any(k in lowered for k in ["veeam", "cloud connect", "backup job", "backup copy job", "replica job", "configuration backup", "health check"]): if not any(k in lowered for k in ["veeam", "cloud connect", "backup job", "backup copy job", "replica job", "configuration backup", "health check"]):
return False, {}, [] return False, {}, []
# Handle VSPC daily alarm summary early.
if is_vspc_daily_alarm_summary:
objects, overall_status, overall_message = _parse_vspc_summary_daily_alarm_report_from_html(html_body)
result = {
"backup_software": "Veeam",
"backup_type": "Service Provider Console",
"job_name": "Daily alarms",
"overall_status": overall_status,
}
if overall_message:
result["overall_message"] = overall_message
return True, result, objects
# Handle Cloud Connect daily report early: overall status is derived from row colours. # Handle Cloud Connect daily report early: overall status is derived from row colours.
if is_cloud_connect_report: if is_cloud_connect_report:
objects, overall_status = _parse_cloud_connect_report_from_html(html_body) objects, overall_status = _parse_cloud_connect_report_from_html(html_body)
@ -1089,6 +1060,21 @@ def try_parse_veeam(msg: MailMessage) -> Tuple[bool, Dict, List[Dict]]:
return True, result, objects return True, result, objects
# Handle VSPC Active Alarms summary early.
if is_vspc_active_alarms:
objects, overall_status, overall_message = _parse_vspc_active_alarms_from_html(html_body)
result = {
\"backup_software\": \"Veeam\",
\"backup_type\": \"Service Provider Console\",
\"job_name\": \"Active alarms summary\",
\"overall_status\": overall_status,
}
if overall_message:
result[\"overall_message\"] = overall_message
return True, result, objects
if m_status: if m_status:
status_word = m_status.group(1) status_word = m_status.group(1)
rest = m_status.group(2) rest = m_status.group(2)

View File

@ -159,6 +159,13 @@
- Derived overall status from the worst status found across all companies (Failed > Warning > Success). - Derived overall status from the worst status found across all companies (Failed > Warning > Success).
- Registered "Service Provider Console" as a supported Veeam backup type for consistent reporting. - Registered "Service Provider Console" as a supported Veeam backup type for consistent reporting.
---
## v20260112-07-veeam-vspc-active-alarms-parser
- Improved detection for VSPC “Active Alarms” summary emails based on HTML content to prevent fallback to other Veeam parsers.
- Implemented parsing of alarms per Company and per alarm row, creating objects named “<Company> | <Object>”.
- Derived object status from “Current State” and attached alarm details where available (fallback to Alarm Name).
================================================================================================================================================ ================================================================================================================================================
## v0.1.19 ## v0.1.19
This release delivers a broad set of improvements focused on reliability, transparency, and operational control across mail processing, administrative auditing, and Run Checks workflows. The changes aim to make message handling more robust, provide better insight for administrators, and give operators clearer and more flexible control when reviewing backup runs. This release delivers a broad set of improvements focused on reliability, transparency, and operational control across mail processing, administrative auditing, and Run Checks workflows. The changes aim to make message handling more robust, provide better insight for administrators, and give operators clearer and more flexible control when reviewing backup runs.