diff --git a/.last-branch b/.last-branch index 5e9dd4a..7273c0f 100644 --- a/.last-branch +++ b/.last-branch @@ -1 +1 @@ -v20260320-02-cove-historical-runs-and-detail +25 diff --git a/containers/backupchecks/src/backend/app/main/routes_shared.py b/containers/backupchecks/src/backend/app/main/routes_shared.py index 95c54ba..1e52727 100644 --- a/containers/backupchecks/src/backend/app/main/routes_shared.py +++ b/containers/backupchecks/src/backend/app/main/routes_shared.py @@ -659,7 +659,9 @@ def _infer_schedule_map_from_runs(job_id: int): - Synthetic missed rows never influence schedule inference. - To reduce noise, a weekday/time bucket must occur at least MIN_OCCURRENCES times. """ - MIN_OCCURRENCES = 3 + # Higher threshold reduces false positives from short-lived patterns + # (e.g. a time-of-day shift that briefly leaves two active slots). + MIN_OCCURRENCES = 5 schedule = {i: [] for i in range(7)} # 0=Mon .. 6=Sun # Certain job types are informational and should never participate in schedule @@ -691,6 +693,10 @@ def _infer_schedule_map_from_runs(job_id: int): try: # Only infer schedules from real runs that came from mail reports. # Synthetic "Missed" rows must never influence schedule inference. + # Limit to the last 90 days so that schedule changes (different day, + # time, or frequency) take effect quickly and do not leave stale slots + # generating false missed runs. + cutoff_utc = datetime.utcnow() - timedelta(days=90) runs = ( JobRun.query .filter( @@ -698,6 +704,7 @@ def _infer_schedule_map_from_runs(job_id: int): JobRun.run_at.isnot(None), JobRun.missed.is_(False), JobRun.mail_message_id.isnot(None), + JobRun.run_at >= cutoff_utc, ) .order_by(JobRun.run_at.desc()) .limit(500) @@ -716,6 +723,7 @@ def _infer_schedule_map_from_runs(job_id: int): tz = None counts = {i: {} for i in range(7)} # weekday -> { "HH:MM": count } + run_dts = [] # Collected for cadence guard below for r in runs: if not r.run_at: continue @@ -730,12 +738,32 @@ def _infer_schedule_map_from_runs(job_id: int): except Exception: pass + run_dts.append(dt) wd = dt.weekday() minute_bucket = (dt.minute // 15) * 15 hh = dt.hour tstr = f"{hh:02d}:{minute_bucket:02d}" counts[wd][tstr] = int(counts[wd].get(tstr, 0)) + 1 + # Cadence guard: if the median gap between runs is >= 20 days the job has a + # monthly (or lower) cadence. Return an empty weekly schedule so that + # _infer_monthly_schedule_from_runs() handles it instead. + if len(run_dts) >= 2: + sorted_dts = sorted(run_dts) + gaps = [] + for i in range(1, len(sorted_dts)): + try: + delta_days = (sorted_dts[i] - sorted_dts[i - 1]).total_seconds() / 86400.0 + if delta_days > 0: + gaps.append(delta_days) + except Exception: + continue + if gaps: + gaps_sorted = sorted(gaps) + median_gap = gaps_sorted[len(gaps_sorted) // 2] + if median_gap >= 20.0: + return schedule # empty — defer to monthly inference + for wd in range(7): # Keep only buckets that occur frequently enough. keep = [t for t, c in counts[wd].items() if int(c) >= MIN_OCCURRENCES] @@ -762,6 +790,9 @@ def _infer_monthly_schedule_from_runs(job_id: int): try: # Same "real run" rule as weekly inference. + # 180 days gives ~6 occurrences for a monthly job (enough for + # MIN_OCCURRENCES=3) while still discarding stale schedule data. + cutoff_utc = datetime.utcnow() - timedelta(days=180) runs = ( JobRun.query .filter( @@ -769,6 +800,7 @@ def _infer_monthly_schedule_from_runs(job_id: int): JobRun.run_at.isnot(None), JobRun.missed.is_(False), JobRun.mail_message_id.isnot(None), + JobRun.run_at >= cutoff_utc, ) .order_by(JobRun.run_at.asc()) .limit(500) diff --git a/containers/backupchecks/src/templates/main/job_detail.html b/containers/backupchecks/src/templates/main/job_detail.html index 4aa39fc..2053417 100644 --- a/containers/backupchecks/src/templates/main/job_detail.html +++ b/containers/backupchecks/src/templates/main/job_detail.html @@ -645,8 +645,8 @@ function renderObjects(objects) { function objectSeverityRank(o) { var st = String((o && o.status) || "").trim().toLowerCase(); var err = String((o && o.error_message) || "").trim(); - if (st === "error" || st === "failed" || st === "failure" || err) return 0; - if (st === "warning") return 1; + if (st === "error" || st === "failed" || st === "failure") return 0; + if (st === "warning" || err) return 1; return 2; } diff --git a/containers/backupchecks/src/templates/main/run_checks.html b/containers/backupchecks/src/templates/main/run_checks.html index 3876893..eaa1e62 100644 --- a/containers/backupchecks/src/templates/main/run_checks.html +++ b/containers/backupchecks/src/templates/main/run_checks.html @@ -208,17 +208,21 @@ overflow: auto; } - #runChecksModal #rcm_body_iframe { - flex: 1 1 auto; - min-height: 0; - height: auto; - } #runChecksModal .rcm-mail-panel { display: flex; flex-direction: column; flex: 1 1 auto; min-height: 0; } + #runChecksModal #rcm_mail_iframe_body { + flex: 1 1 auto; + min-height: 0; + overflow: hidden; + } + #runChecksModal #rcm_body_iframe { + height: 100%; + display: block; + } #runChecksModal .rcm-objects-scroll { max-height: 25vh; overflow: auto; @@ -493,8 +497,8 @@ function statusClass(status) { function objectSeverityRank(o) { var st = String((o && o.status) || '').trim().toLowerCase(); var err = String((o && o.error_message) || '').trim(); - if (st === 'error' || st === 'failed' || st === 'failure' || err) return 0; - if (st === 'warning') return 1; + if (st === 'error' || st === 'failed' || st === 'failure') return 0; + if (st === 'warning' || err) return 1; return 2; } diff --git a/docs/changelog-claude.md b/docs/changelog-claude.md index e9e6249..b3391f9 100644 --- a/docs/changelog-claude.md +++ b/docs/changelog-claude.md @@ -2,6 +2,26 @@ This file documents all changes made to this project via Claude Code. +## [2026-03-20] (6) + +### Fixed +- Run Checks and Job Detail modals — objects list sorting: + - `objectSeverityRank`: Warning items with an `error_message` (e.g. "Processing mailbox: MT completed with warning: Cannot process") were incorrectly ranked as Critical (rank 0) due to `|| err` on the rank-0 check; they are now correctly ranked as Warning (rank 1); only status `error`/`failed`/`failure` triggers rank 0 + - Success objects that do have an `error_message` are still promoted to Warning rank (rank 1) to keep them visible +- Run Checks modal — mail iframe no longer collapses to near-zero height: + - `#rcm_mail_iframe_body` was missing flex rules so the `flex: 1 1 auto` on `#rcm_body_iframe` had no effect (the iframe is not a direct flex child of `.rcm-mail-panel`) + - Fixed: `#rcm_mail_iframe_body` now gets `flex: 1 1 auto; min-height: 0; overflow: hidden` so it fills the available panel space; `#rcm_body_iframe` gets `height: 100%; display: block` + +## [2026-03-20] (5) + +### Fixed +- Missed run detection: false positives caused by stale schedule data: + - **Time-of-day changes**: old time slot stayed active until 500 historical runs were "used up"; now weekly inference only looks at the last 90 days, so a changed run time no longer generates missed runs on the old slot after 90 days + - **Frequency changes** (e.g. daily → weekly): same 90-day window ensures old patterns stop influencing inference within 3 months + - **Monthly jobs falsely detected as weekly**: after ~21 months a monthly job at a fixed time accumulated 3+ hits per weekday, triggering daily missed runs; fixed by a cadence guard — if the median gap between runs ≥ 20 days, weekly inference is skipped and monthly inference handles the job instead + - **Monthly inference**: limited to the last 180 days so schedule changes are forgotten within 6 months while still providing enough data (≥ 3 occurrences) for detection + - `MIN_OCCURRENCES` for weekly inference raised from 3 → 5 to reduce false positives from transitional patterns (two overlapping slots during a time shift) + ## [2026-03-20] (4) ### Added