Compare commits
No commits in common. "b12bac5e345a459ffb3ea7ed078be4802c22049d" and "b7ad9cca726b038f8d73a97825e60a0a4837cd54" have entirely different histories.
b12bac5e34
...
b7ad9cca72
@ -1 +1 @@
|
|||||||
v20260106-19-missed-run-detection-threshold
|
v20260106-18-runchecks-popup-objects-fallback
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
from .routes_shared import * # noqa: F401,F403
|
from .routes_shared import * # noqa: F401,F403
|
||||||
from .routes_shared import _format_datetime, _get_or_create_settings, _apply_overrides_to_run, _infer_schedule_map_from_runs, _infer_monthly_schedule_from_runs
|
from .routes_shared import _format_datetime, _get_or_create_settings, _apply_overrides_to_run, _infer_schedule_map_from_runs
|
||||||
|
|
||||||
# Grace window for today's Expected/Missed transition.
|
# Grace window for today's Expected/Missed transition.
|
||||||
# A job is only marked Missed after the latest expected time plus this grace.
|
# A job is only marked Missed after the latest expected time plus this grace.
|
||||||
@ -85,23 +85,6 @@ def daily_jobs():
|
|||||||
for job in jobs:
|
for job in jobs:
|
||||||
schedule_map = _infer_schedule_map_from_runs(job.id)
|
schedule_map = _infer_schedule_map_from_runs(job.id)
|
||||||
expected_times = schedule_map.get(weekday_idx) or []
|
expected_times = schedule_map.get(weekday_idx) or []
|
||||||
|
|
||||||
# If no weekly schedule is inferred (e.g. monthly jobs), try monthly inference.
|
|
||||||
if not expected_times:
|
|
||||||
monthly = _infer_monthly_schedule_from_runs(job.id)
|
|
||||||
if monthly:
|
|
||||||
dom = int(monthly.get("day_of_month") or 0)
|
|
||||||
mtimes = monthly.get("times") or []
|
|
||||||
# For months shorter than dom, treat the last day of month as the scheduled day.
|
|
||||||
try:
|
|
||||||
import calendar as _calendar
|
|
||||||
last_dom = _calendar.monthrange(target_date.year, target_date.month)[1]
|
|
||||||
except Exception:
|
|
||||||
last_dom = target_date.day
|
|
||||||
scheduled_dom = dom if (dom and dom <= last_dom) else last_dom
|
|
||||||
if target_date.day == scheduled_dom:
|
|
||||||
expected_times = list(mtimes)
|
|
||||||
|
|
||||||
if not expected_times:
|
if not expected_times:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import calendar
|
|
||||||
|
|
||||||
from datetime import date, datetime, time, timedelta, timezone
|
from datetime import date, datetime, time, timedelta, timezone
|
||||||
|
|
||||||
from flask import jsonify, render_template, request
|
from flask import jsonify, render_template, request
|
||||||
@ -15,7 +13,6 @@ from .routes_shared import (
|
|||||||
_get_ui_timezone_name,
|
_get_ui_timezone_name,
|
||||||
_get_or_create_settings,
|
_get_or_create_settings,
|
||||||
_infer_schedule_map_from_runs,
|
_infer_schedule_map_from_runs,
|
||||||
_infer_monthly_schedule_from_runs,
|
|
||||||
_to_amsterdam_date,
|
_to_amsterdam_date,
|
||||||
main_bp,
|
main_bp,
|
||||||
roles_required,
|
roles_required,
|
||||||
@ -87,13 +84,7 @@ def _ensure_missed_runs_for_job(job: Job, start_from: date, end_inclusive: date)
|
|||||||
"""
|
"""
|
||||||
tz = _get_ui_timezone()
|
tz = _get_ui_timezone()
|
||||||
schedule_map = _infer_schedule_map_from_runs(job.id) or {}
|
schedule_map = _infer_schedule_map_from_runs(job.id) or {}
|
||||||
has_weekly_times = any((schedule_map.get(i) or []) for i in range(7))
|
if not schedule_map:
|
||||||
|
|
||||||
monthly = None
|
|
||||||
if not has_weekly_times:
|
|
||||||
monthly = _infer_monthly_schedule_from_runs(job.id)
|
|
||||||
|
|
||||||
if (not has_weekly_times) and (not monthly):
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
today_local = _to_amsterdam_date(datetime.utcnow()) or datetime.utcnow().date()
|
today_local = _to_amsterdam_date(datetime.utcnow()) or datetime.utcnow().date()
|
||||||
@ -129,8 +120,6 @@ def _ensure_missed_runs_for_job(job: Job, start_from: date, end_inclusive: date)
|
|||||||
inserted = 0
|
inserted = 0
|
||||||
d = start_from
|
d = start_from
|
||||||
while d <= end_inclusive:
|
while d <= end_inclusive:
|
||||||
if not has_weekly_times:
|
|
||||||
break
|
|
||||||
weekday = d.weekday()
|
weekday = d.weekday()
|
||||||
times = schedule_map.get(weekday) or []
|
times = schedule_map.get(weekday) or []
|
||||||
if not times:
|
if not times:
|
||||||
@ -188,82 +177,6 @@ def _ensure_missed_runs_for_job(job: Job, start_from: date, end_inclusive: date)
|
|||||||
|
|
||||||
d = d + timedelta(days=1)
|
d = d + timedelta(days=1)
|
||||||
|
|
||||||
|
|
||||||
# Monthly expected slots (fallback when no stable weekly schedule is detected)
|
|
||||||
if (not has_weekly_times) and monthly:
|
|
||||||
try:
|
|
||||||
dom = int(monthly.get("day_of_month") or 0)
|
|
||||||
except Exception:
|
|
||||||
dom = 0
|
|
||||||
times = monthly.get("times") or []
|
|
||||||
|
|
||||||
if dom > 0 and times:
|
|
||||||
# Iterate months in the window [start_from, end_inclusive]
|
|
||||||
cur = date(start_from.year, start_from.month, 1)
|
|
||||||
end_marker = date(end_inclusive.year, end_inclusive.month, 1)
|
|
||||||
|
|
||||||
while cur <= end_marker:
|
|
||||||
try:
|
|
||||||
last_dom = calendar.monthrange(cur.year, cur.month)[1]
|
|
||||||
except Exception:
|
|
||||||
last_dom = 28
|
|
||||||
scheduled_dom = dom if dom <= last_dom else last_dom
|
|
||||||
scheduled_date = date(cur.year, cur.month, scheduled_dom)
|
|
||||||
|
|
||||||
if scheduled_date >= start_from and scheduled_date <= end_inclusive:
|
|
||||||
for hhmm in times:
|
|
||||||
hm = _parse_hhmm(hhmm)
|
|
||||||
if not hm:
|
|
||||||
continue
|
|
||||||
hh, mm = hm
|
|
||||||
|
|
||||||
local_dt = datetime.combine(scheduled_date, time(hour=hh, minute=mm))
|
|
||||||
if tz:
|
|
||||||
local_dt = local_dt.replace(tzinfo=tz)
|
|
||||||
|
|
||||||
# Only generate missed runs for past slots.
|
|
||||||
if local_dt > now_local_dt:
|
|
||||||
continue
|
|
||||||
|
|
||||||
slot_utc_naive = _utc_naive_from_local(local_dt)
|
|
||||||
|
|
||||||
window_start = slot_utc_naive - MISSED_GRACE_WINDOW
|
|
||||||
window_end = slot_utc_naive + MISSED_GRACE_WINDOW
|
|
||||||
|
|
||||||
exists = (
|
|
||||||
db.session.query(JobRun.id)
|
|
||||||
.filter(
|
|
||||||
JobRun.job_id == job.id,
|
|
||||||
JobRun.run_at.isnot(None),
|
|
||||||
or_(
|
|
||||||
and_(JobRun.missed.is_(False), JobRun.mail_message_id.isnot(None)),
|
|
||||||
and_(JobRun.missed.is_(True), JobRun.mail_message_id.is_(None)),
|
|
||||||
),
|
|
||||||
JobRun.run_at >= window_start,
|
|
||||||
JobRun.run_at <= window_end,
|
|
||||||
)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if exists:
|
|
||||||
continue
|
|
||||||
|
|
||||||
miss = JobRun(
|
|
||||||
job_id=job.id,
|
|
||||||
run_at=slot_utc_naive,
|
|
||||||
status="Missed",
|
|
||||||
missed=True,
|
|
||||||
remark=None,
|
|
||||||
mail_message_id=None,
|
|
||||||
)
|
|
||||||
db.session.add(miss)
|
|
||||||
inserted += 1
|
|
||||||
|
|
||||||
# Next month
|
|
||||||
if cur.month == 12:
|
|
||||||
cur = date(cur.year + 1, 1, 1)
|
|
||||||
else:
|
|
||||||
cur = date(cur.year, cur.month + 1, 1)
|
|
||||||
|
|
||||||
if inserted:
|
if inserted:
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return inserted
|
return inserted
|
||||||
|
|||||||
@ -6,7 +6,6 @@ import json
|
|||||||
import re
|
import re
|
||||||
import html as _html
|
import html as _html
|
||||||
import math
|
import math
|
||||||
import calendar
|
|
||||||
import datetime as datetime_module
|
import datetime as datetime_module
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
@ -613,13 +612,7 @@ def _infer_schedule_map_from_runs(job_id: int):
|
|||||||
"""Infer weekly schedule blocks (15-min) from historical runs.
|
"""Infer weekly schedule blocks (15-min) from historical runs.
|
||||||
|
|
||||||
Returns dict weekday->sorted list of 'HH:MM' strings in configured UI local time.
|
Returns dict weekday->sorted list of 'HH:MM' strings in configured UI local time.
|
||||||
|
|
||||||
Notes:
|
|
||||||
- Only considers real runs that came from mail reports (mail_message_id is not NULL).
|
|
||||||
- Synthetic missed rows never influence schedule inference.
|
|
||||||
- To reduce noise, a weekday/time bucket must occur at least MIN_OCCURRENCES times.
|
|
||||||
"""
|
"""
|
||||||
MIN_OCCURRENCES = 3
|
|
||||||
schedule = {i: [] for i in range(7)} # 0=Mon .. 6=Sun
|
schedule = {i: [] for i in range(7)} # 0=Mon .. 6=Sun
|
||||||
|
|
||||||
# Certain job types are informational and should never participate in schedule
|
# Certain job types are informational and should never participate in schedule
|
||||||
@ -639,7 +632,6 @@ def _infer_schedule_map_from_runs(job_id: int):
|
|||||||
return schedule
|
return schedule
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Only infer schedules from real runs that came from mail reports.
|
# Only infer schedules from real runs that came from mail reports.
|
||||||
# Synthetic "Missed" rows must never influence schedule inference.
|
# Synthetic "Missed" rows must never influence schedule inference.
|
||||||
@ -661,13 +653,13 @@ def _infer_schedule_map_from_runs(job_id: int):
|
|||||||
if not runs:
|
if not runs:
|
||||||
return schedule
|
return schedule
|
||||||
|
|
||||||
# Convert run_at to UI local time and bucket into 15-minute blocks.
|
# Convert run_at to UI local time and bucket into 15-minute blocks
|
||||||
try:
|
try:
|
||||||
tz = _get_ui_timezone()
|
tz = _get_ui_timezone()
|
||||||
except Exception:
|
except Exception:
|
||||||
tz = None
|
tz = None
|
||||||
|
|
||||||
counts = {i: {} for i in range(7)} # weekday -> { "HH:MM": count }
|
seen = {i: set() for i in range(7)}
|
||||||
for r in runs:
|
for r in runs:
|
||||||
if not r.run_at:
|
if not r.run_at:
|
||||||
continue
|
continue
|
||||||
@ -686,139 +678,14 @@ def _infer_schedule_map_from_runs(job_id: int):
|
|||||||
minute_bucket = (dt.minute // 15) * 15
|
minute_bucket = (dt.minute // 15) * 15
|
||||||
hh = dt.hour
|
hh = dt.hour
|
||||||
tstr = f"{hh:02d}:{minute_bucket:02d}"
|
tstr = f"{hh:02d}:{minute_bucket:02d}"
|
||||||
counts[wd][tstr] = int(counts[wd].get(tstr, 0)) + 1
|
seen[wd].add(tstr)
|
||||||
|
|
||||||
for wd in range(7):
|
for wd in range(7):
|
||||||
# Keep only buckets that occur frequently enough.
|
schedule[wd] = sorted(seen[wd])
|
||||||
keep = [t for t, c in counts[wd].items() if int(c) >= MIN_OCCURRENCES]
|
|
||||||
schedule[wd] = sorted(keep)
|
|
||||||
|
|
||||||
return schedule
|
return schedule
|
||||||
|
|
||||||
|
|
||||||
def _infer_monthly_schedule_from_runs(job_id: int):
|
|
||||||
"""Infer a monthly schedule from historical runs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict with keys:
|
|
||||||
- day_of_month (int)
|
|
||||||
- times (list[str] of 'HH:MM' 15-min buckets)
|
|
||||||
or None if not enough evidence.
|
|
||||||
|
|
||||||
Rules:
|
|
||||||
- Uses only real mail-based runs (mail_message_id is not NULL) and excludes synthetic missed rows.
|
|
||||||
- Requires at least MIN_OCCURRENCES occurrences for the inferred day-of-month.
|
|
||||||
- Uses a simple cadence heuristic: typical gaps between runs must be >= 20 days to qualify as monthly.
|
|
||||||
"""
|
|
||||||
MIN_OCCURRENCES = 3
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Same "real run" rule as weekly inference.
|
|
||||||
runs = (
|
|
||||||
JobRun.query
|
|
||||||
.filter(
|
|
||||||
JobRun.job_id == job_id,
|
|
||||||
JobRun.run_at.isnot(None),
|
|
||||||
JobRun.missed.is_(False),
|
|
||||||
JobRun.mail_message_id.isnot(None),
|
|
||||||
)
|
|
||||||
.order_by(JobRun.run_at.asc())
|
|
||||||
.limit(500)
|
|
||||||
.all()
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
runs = []
|
|
||||||
|
|
||||||
if len(runs) < MIN_OCCURRENCES:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
tz = _get_ui_timezone()
|
|
||||||
except Exception:
|
|
||||||
tz = None
|
|
||||||
|
|
||||||
# Convert and keep local datetimes.
|
|
||||||
local_dts = []
|
|
||||||
for r in runs:
|
|
||||||
if not r.run_at:
|
|
||||||
continue
|
|
||||||
dt = r.run_at
|
|
||||||
if tz is not None:
|
|
||||||
try:
|
|
||||||
if dt.tzinfo is None:
|
|
||||||
dt = dt.replace(tzinfo=datetime_module.timezone.utc).astimezone(tz)
|
|
||||||
else:
|
|
||||||
dt = dt.astimezone(tz)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
local_dts.append(dt)
|
|
||||||
|
|
||||||
if len(local_dts) < MIN_OCCURRENCES:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Cadence heuristic: monthly jobs shouldn't look weekly.
|
|
||||||
local_dts_sorted = sorted(local_dts)
|
|
||||||
gaps = []
|
|
||||||
for i in range(1, len(local_dts_sorted)):
|
|
||||||
try:
|
|
||||||
delta_days = (local_dts_sorted[i] - local_dts_sorted[i - 1]).total_seconds() / 86400.0
|
|
||||||
if delta_days > 0:
|
|
||||||
gaps.append(delta_days)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if gaps:
|
|
||||||
gaps_sorted = sorted(gaps)
|
|
||||||
median_gap = gaps_sorted[len(gaps_sorted) // 2]
|
|
||||||
# If it looks like a weekly/daily cadence, do not classify as monthly.
|
|
||||||
if median_gap < 20.0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Count day-of-month occurrences and time buckets on that day.
|
|
||||||
dom_counts = {}
|
|
||||||
time_counts_by_dom = {} # dom -> { "HH:MM": count }
|
|
||||||
for dt in local_dts:
|
|
||||||
dom = int(dt.day)
|
|
||||||
dom_counts[dom] = int(dom_counts.get(dom, 0)) + 1
|
|
||||||
|
|
||||||
minute_bucket = (dt.minute // 15) * 15
|
|
||||||
tstr = f"{int(dt.hour):02d}:{int(minute_bucket):02d}"
|
|
||||||
if dom not in time_counts_by_dom:
|
|
||||||
time_counts_by_dom[dom] = {}
|
|
||||||
time_counts_by_dom[dom][tstr] = int(time_counts_by_dom[dom].get(tstr, 0)) + 1
|
|
||||||
|
|
||||||
# Pick the most common day-of-month with enough occurrences.
|
|
||||||
best_dom = None
|
|
||||||
best_dom_count = 0
|
|
||||||
for dom, c in dom_counts.items():
|
|
||||||
if int(c) >= MIN_OCCURRENCES and int(c) > best_dom_count:
|
|
||||||
best_dom = int(dom)
|
|
||||||
best_dom_count = int(c)
|
|
||||||
|
|
||||||
if best_dom is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Times on that day must also be stable. Keep frequent buckets; otherwise fall back to the top bucket.
|
|
||||||
time_counts = time_counts_by_dom.get(best_dom) or {}
|
|
||||||
keep_times = [t for t, c in time_counts.items() if int(c) >= MIN_OCCURRENCES]
|
|
||||||
if not keep_times:
|
|
||||||
# Fallback: choose the single most common time bucket for that day.
|
|
||||||
best_t = None
|
|
||||||
best_c = 0
|
|
||||||
for t, c in time_counts.items():
|
|
||||||
if int(c) > best_c:
|
|
||||||
best_t = t
|
|
||||||
best_c = int(c)
|
|
||||||
if best_t:
|
|
||||||
keep_times = [best_t]
|
|
||||||
|
|
||||||
keep_times = sorted(set(keep_times))
|
|
||||||
if not keep_times:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {"day_of_month": int(best_dom), "times": keep_times}
|
|
||||||
|
|
||||||
|
|
||||||
def _schedule_map_to_desc(schedule_map):
|
def _schedule_map_to_desc(schedule_map):
|
||||||
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||||||
any_times = any(schedule_map.get(i) for i in range(7))
|
any_times = any(schedule_map.get(i) for i in range(7))
|
||||||
|
|||||||
@ -139,15 +139,6 @@ Removed an incorrectly indented redirect statement so the module loads correctly
|
|||||||
- Added legacy fallback to load objects via JobRun.objects for older data during/after upgrades.
|
- Added legacy fallback to load objects via JobRun.objects for older data during/after upgrades.
|
||||||
- Added mail-based fallback to load objects via MailObject when no run-linked objects exist yet.
|
- Added mail-based fallback to load objects via MailObject when no run-linked objects exist yet.
|
||||||
- Updated imports in routes_run_checks to include JobObject and MailObject used by the fallback logic.
|
- Updated imports in routes_run_checks to include JobObject and MailObject used by the fallback logic.
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## v20260106-19-missed-run-detection-threshold
|
|
||||||
- Improved weekly schedule inference by requiring a time bucket to occur at least 3 times before it is considered “expected” (reduces outlier noise and false missed runs).
|
|
||||||
- Added monthly schedule inference based on real mail-based runs (>= 3 occurrences) with a cadence check to avoid classifying weekly jobs as monthly.
|
|
||||||
- Updated missed run generation (Run Checks) to use monthly inference when no stable weekly schedule exists, so monthly jobs are marked missed on the correct expected day instead of a week later.
|
|
||||||
- Updated Daily Jobs to show expected entries for inferred monthly jobs on the scheduled day-of-month (with last-day-of-month fallback for short months).
|
|
||||||
|
|
||||||
================================================================================================================================================
|
================================================================================================================================================
|
||||||
## v0.1.16
|
## v0.1.16
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user