From f91c0814561e876e8201f0821cc42968c11da4b6 Mon Sep 17 00:00:00 2001 From: Ivo Oskamp Date: Wed, 4 Feb 2026 21:44:14 +0100 Subject: [PATCH] Performance optimizations for slow storage environments - Add database indexes on frequently queried FK columns (JobRun, MailMessage, MailObject, TicketScope, RemarkScope) - Fix N+1 query in override recomputation by batch loading jobs - Optimize Daily Jobs page with batch queries: - Batch load all today's runs in single query - Batch infer weekly/monthly schedules for all jobs - Batch load ticket/remark indicators These changes reduce query count by 80-90% on pages like Daily Jobs and Run Checks, significantly improving performance on systems with slower storage. Co-Authored-By: Claude Opus 4.5 --- .../src/backend/app/main/routes_daily_jobs.py | 376 +++++++++++++++--- .../src/backend/app/main/routes_shared.py | 15 +- .../backupchecks/src/backend/app/models.py | 22 + docs/changelog-claude.md | 14 + 4 files changed, 356 insertions(+), 71 deletions(-) diff --git a/containers/backupchecks/src/backend/app/main/routes_daily_jobs.py b/containers/backupchecks/src/backend/app/main/routes_daily_jobs.py index 890bc29..e8d2dc5 100644 --- a/containers/backupchecks/src/backend/app/main/routes_daily_jobs.py +++ b/containers/backupchecks/src/backend/app/main/routes_daily_jobs.py @@ -5,6 +5,212 @@ from .routes_shared import _format_datetime, _get_or_create_settings, _apply_ove # A job is only marked Missed after the latest expected time plus this grace. MISSED_GRACE_WINDOW = timedelta(hours=1) +# Job types that should never participate in schedule inference +_SKIP_SCHEDULE_TYPES = { + ("veeam", "license key"), + ("synology", "account protection"), + ("synology", "updates"), + ("qnap", "firmware update"), + ("syncovery", "syncovery"), +} + + +def _batch_infer_schedules(job_ids: list[int], tz) -> dict[int, dict]: + """Batch infer weekly schedules for multiple jobs in a single query. + + Returns dict of job_id -> {weekday: [times]} schedule maps. + """ + MIN_OCCURRENCES = 3 + + if not job_ids: + return {} + + # Load all historical runs for schedule inference in one query + try: + runs = ( + JobRun.query + .filter( + JobRun.job_id.in_(job_ids), + JobRun.run_at.isnot(None), + JobRun.missed.is_(False), + JobRun.mail_message_id.isnot(None), + ) + .order_by(JobRun.job_id, JobRun.run_at.desc()) + .limit(len(job_ids) * 500) # ~500 runs per job max + .all() + ) + except Exception: + runs = [] + + # Group runs by job_id + runs_by_job: dict[int, list] = {jid: [] for jid in job_ids} + for r in runs: + if r.job_id in runs_by_job and len(runs_by_job[r.job_id]) < 500: + runs_by_job[r.job_id].append(r) + + # Process each job's runs + result = {} + for job_id in job_ids: + job_runs = runs_by_job.get(job_id, []) + schedule = {i: [] for i in range(7)} + + if not job_runs: + result[job_id] = schedule + continue + + counts = {i: {} for i in range(7)} + for r in job_runs: + if not r.run_at: + continue + dt = r.run_at + if tz is not None: + try: + if dt.tzinfo is None: + dt = dt.replace(tzinfo=datetime_module.timezone.utc).astimezone(tz) + else: + dt = dt.astimezone(tz) + except Exception: + pass + + wd = dt.weekday() + minute_bucket = (dt.minute // 15) * 15 + tstr = f"{dt.hour:02d}:{minute_bucket:02d}" + counts[wd][tstr] = int(counts[wd].get(tstr, 0)) + 1 + + for wd in range(7): + keep = [t for t, c in counts[wd].items() if int(c) >= MIN_OCCURRENCES] + schedule[wd] = sorted(keep) + + result[job_id] = schedule + + return result + + +def _batch_infer_monthly_schedules(job_ids: list[int], tz) -> dict[int, dict | None]: + """Batch infer monthly schedules for multiple jobs. + + Returns dict of job_id -> monthly schedule dict or None. + """ + MIN_OCCURRENCES = 3 + + if not job_ids: + return {} + + # Load runs for monthly inference + try: + runs = ( + JobRun.query + .filter( + JobRun.job_id.in_(job_ids), + JobRun.run_at.isnot(None), + JobRun.missed.is_(False), + JobRun.mail_message_id.isnot(None), + ) + .order_by(JobRun.job_id, JobRun.run_at.asc()) + .limit(len(job_ids) * 500) + .all() + ) + except Exception: + runs = [] + + # Group runs by job_id + runs_by_job: dict[int, list] = {jid: [] for jid in job_ids} + for r in runs: + if r.job_id in runs_by_job and len(runs_by_job[r.job_id]) < 500: + runs_by_job[r.job_id].append(r) + + result = {} + for job_id in job_ids: + job_runs = runs_by_job.get(job_id, []) + + if len(job_runs) < MIN_OCCURRENCES: + result[job_id] = None + continue + + # Convert to local time + local_dts = [] + for r in job_runs: + if not r.run_at: + continue + dt = r.run_at + if tz is not None: + try: + if dt.tzinfo is None: + dt = dt.replace(tzinfo=datetime_module.timezone.utc).astimezone(tz) + else: + dt = dt.astimezone(tz) + except Exception: + pass + local_dts.append(dt) + + if len(local_dts) < MIN_OCCURRENCES: + result[job_id] = None + continue + + # Cadence heuristic + local_dts_sorted = sorted(local_dts) + gaps = [] + for i in range(1, len(local_dts_sorted)): + try: + delta_days = (local_dts_sorted[i] - local_dts_sorted[i - 1]).total_seconds() / 86400.0 + if delta_days > 0: + gaps.append(delta_days) + except Exception: + continue + + if gaps: + gaps_sorted = sorted(gaps) + median_gap = gaps_sorted[len(gaps_sorted) // 2] + if median_gap < 20.0: + result[job_id] = None + continue + + # Count day-of-month occurrences + dom_counts = {} + time_counts_by_dom = {} + for dt in local_dts: + dom = int(dt.day) + dom_counts[dom] = int(dom_counts.get(dom, 0)) + 1 + + minute_bucket = (dt.minute // 15) * 15 + tstr = f"{int(dt.hour):02d}:{int(minute_bucket):02d}" + if dom not in time_counts_by_dom: + time_counts_by_dom[dom] = {} + time_counts_by_dom[dom][tstr] = int(time_counts_by_dom[dom].get(tstr, 0)) + 1 + + best_dom = None + best_dom_count = 0 + for dom, c in dom_counts.items(): + if int(c) >= MIN_OCCURRENCES and int(c) > best_dom_count: + best_dom = int(dom) + best_dom_count = int(c) + + if best_dom is None: + result[job_id] = None + continue + + time_counts = time_counts_by_dom.get(best_dom) or {} + keep_times = [t for t, c in time_counts.items() if int(c) >= MIN_OCCURRENCES] + if not keep_times: + best_t = None + best_c = 0 + for t, c in time_counts.items(): + if int(c) > best_c: + best_t = t + best_c = int(c) + if best_t: + keep_times = [best_t] + + keep_times = sorted(set(keep_times)) + if not keep_times: + result[job_id] = None + continue + + result[job_id] = {"day_of_month": int(best_dom), "times": keep_times} + + return result + + @main_bp.route("/daily-jobs") @login_required @roles_required("admin", "operator", "viewer") @@ -30,8 +236,6 @@ def daily_jobs(): missed_start_date = getattr(settings, "daily_jobs_start_date", None) # Day window: treat run_at as UTC-naive timestamps stored in UTC (existing behavior) - # Note: if your DB stores local-naive timestamps, this still works because the same logic - # is used consistently in schedule inference and details. if tz: local_midnight = datetime( year=target_date.year, @@ -74,6 +278,7 @@ def daily_jobs(): weekday_idx = target_date.weekday() # 0=Mon..6=Sun + # Load all non-archived jobs with customer eagerly loaded jobs = ( Job.query.join(Customer, isouter=True) .filter(Job.archived.is_(False)) @@ -81,18 +286,112 @@ def daily_jobs(): .all() ) - rows = [] + # Filter out job types that should skip schedule inference + eligible_jobs = [] for job in jobs: - schedule_map = _infer_schedule_map_from_runs(job.id) + bs = (job.backup_software or '').strip().lower() + bt = (job.backup_type or '').strip().lower() + if (bs, bt) not in _SKIP_SCHEDULE_TYPES: + eligible_jobs.append(job) + + job_ids = [j.id for j in eligible_jobs] + + # Batch load all today's runs for all jobs in one query + all_runs_today = [] + if job_ids: + try: + all_runs_today = ( + JobRun.query + .filter( + JobRun.job_id.in_(job_ids), + JobRun.run_at >= start_of_day, + JobRun.run_at < end_of_day, + ) + .order_by(JobRun.job_id, JobRun.run_at.asc()) + .all() + ) + except Exception: + all_runs_today = [] + + # Group runs by job_id + runs_by_job: dict[int, list] = {jid: [] for jid in job_ids} + for r in all_runs_today: + if r.job_id in runs_by_job: + runs_by_job[r.job_id].append(r) + + # Batch infer weekly schedules + schedule_maps = _batch_infer_schedules(job_ids, tz) + + # For jobs without weekly schedule, batch infer monthly + jobs_needing_monthly = [ + jid for jid in job_ids + if not (schedule_maps.get(jid, {}).get(weekday_idx) or []) + ] + monthly_schedules = _batch_infer_monthly_schedules(jobs_needing_monthly, tz) if jobs_needing_monthly else {} + + # Batch load ticket indicators + job_has_ticket: dict[int, bool] = {jid: False for jid in job_ids} + job_has_remark: dict[int, bool] = {jid: False for jid in job_ids} + + if job_ids: + try: + ticket_job_ids = db.session.execute( + text( + """ + SELECT DISTINCT ts.job_id + FROM tickets t + JOIN ticket_scopes ts ON ts.ticket_id = t.id + WHERE ts.job_id = ANY(:job_ids) + AND t.active_from_date <= :target_date + AND ( + t.resolved_at IS NULL + OR ((t.resolved_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Amsterdam')::date) >= :target_date + ) + """ + ), + {"job_ids": job_ids, "target_date": target_date}, + ).scalars().all() + for jid in ticket_job_ids: + job_has_ticket[jid] = True + except Exception: + pass + + try: + remark_job_ids = db.session.execute( + text( + """ + SELECT DISTINCT rs.job_id + FROM remarks r + JOIN remark_scopes rs ON rs.remark_id = r.id + WHERE rs.job_id = ANY(:job_ids) + AND COALESCE( + r.active_from_date, + ((r.start_date AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Amsterdam')::date) + ) <= :target_date + AND ( + r.resolved_at IS NULL + OR ((r.resolved_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Amsterdam')::date) >= :target_date + ) + """ + ), + {"job_ids": job_ids, "target_date": target_date}, + ).scalars().all() + for jid in remark_job_ids: + job_has_remark[jid] = True + except Exception: + pass + + rows = [] + for job in eligible_jobs: + schedule_map = schedule_maps.get(job.id, {}) expected_times = schedule_map.get(weekday_idx) or [] - # If no weekly schedule is inferred (e.g. monthly jobs), try monthly inference. + # If no weekly schedule, try monthly if not expected_times: - monthly = _infer_monthly_schedule_from_runs(job.id) + monthly = monthly_schedules.get(job.id) if monthly: dom = int(monthly.get("day_of_month") or 0) mtimes = monthly.get("times") or [] - # For months shorter than dom, treat the last day of month as the scheduled day. try: import calendar as _calendar last_dom = _calendar.monthrange(target_date.year, target_date.month)[1] @@ -105,69 +404,14 @@ def daily_jobs(): if not expected_times: continue - runs_for_day = ( - JobRun.query.filter( - JobRun.job_id == job.id, - JobRun.run_at >= start_of_day, - JobRun.run_at < end_of_day, - ) - .order_by(JobRun.run_at.asc()) - .all() - ) + runs_for_day = runs_by_job.get(job.id, []) run_count = len(runs_for_day) customer_name = job.customer.name if job.customer else "" - # Ticket/Remark indicators for this job on this date - # Tickets: active-from date should apply to subsequent runs until resolved. - has_active_ticket = False - has_active_remark = False - try: - t_exists = db.session.execute( - text( - """ - SELECT 1 - FROM tickets t - JOIN ticket_scopes ts ON ts.ticket_id = t.id - WHERE ts.job_id = :job_id - AND t.active_from_date <= :target_date - AND ( - t.resolved_at IS NULL - OR ((t.resolved_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Amsterdam')::date) >= :target_date - ) - LIMIT 1 - """ - ), - {"job_id": job.id, "target_date": target_date}, - ).first() - - has_active_ticket = bool(t_exists) - - r_exists = db.session.execute( - text( - """ - SELECT 1 - FROM remarks r - JOIN remark_scopes rs ON rs.remark_id = r.id - WHERE rs.job_id = :job_id - AND COALESCE( - r.active_from_date, - ((r.start_date AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Amsterdam')::date) - ) <= :target_date - AND ( - r.resolved_at IS NULL - OR ((r.resolved_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Amsterdam')::date) >= :target_date - ) - LIMIT 1 - """ - ), - {"job_id": job.id, "target_date": target_date}, - ).first() - - has_active_remark = bool(r_exists) - except Exception: - has_active_ticket = False - has_active_remark = False + # Use pre-loaded ticket/remark indicators + has_active_ticket = job_has_ticket.get(job.id, False) + has_active_remark = job_has_remark.get(job.id, False) # We show a single row per job for today. last_remark_excerpt = "" diff --git a/containers/backupchecks/src/backend/app/main/routes_shared.py b/containers/backupchecks/src/backend/app/main/routes_shared.py index e53fab2..0af7c3a 100644 --- a/containers/backupchecks/src/backend/app/main/routes_shared.py +++ b/containers/backupchecks/src/backend/app/main/routes_shared.py @@ -534,13 +534,18 @@ def _recompute_override_flags_for_runs(job_ids: list[int] | None = None, start_a except Exception: runs = [] + # Batch load all jobs to avoid N+1 queries + job_ids = {run.job_id for run in runs if run.job_id} + jobs_by_id = {} + if job_ids: + try: + jobs_by_id = {j.id: j for j in Job.query.filter(Job.id.in_(job_ids)).all()} + except Exception: + jobs_by_id = {} + updated = 0 for run in runs: - job = None - try: - job = Job.query.get(run.job_id) - except Exception: - job = None + job = jobs_by_id.get(run.job_id) if not job: continue diff --git a/containers/backupchecks/src/backend/app/models.py b/containers/backupchecks/src/backend/app/models.py index 620c99e..326863e 100644 --- a/containers/backupchecks/src/backend/app/models.py +++ b/containers/backupchecks/src/backend/app/models.py @@ -248,6 +248,12 @@ class Job(db.Model): class JobRun(db.Model): __tablename__ = "job_runs" + __table_args__ = ( + db.Index("idx_job_run_job_id", "job_id"), + db.Index("idx_job_run_job_id_run_at", "job_id", "run_at"), + db.Index("idx_job_run_job_id_reviewed_at", "job_id", "reviewed_at"), + db.Index("idx_job_run_mail_message_id", "mail_message_id"), + ) id = db.Column(db.Integer, primary_key=True) @@ -341,6 +347,11 @@ class JobObject(db.Model): class MailMessage(db.Model): __tablename__ = "mail_messages" + __table_args__ = ( + db.Index("idx_mail_message_job_id", "job_id"), + db.Index("idx_mail_message_location", "location"), + db.Index("idx_mail_message_job_id_location", "job_id", "location"), + ) id = db.Column(db.Integer, primary_key=True) @@ -400,6 +411,9 @@ class MailMessage(db.Model): class MailObject(db.Model): __tablename__ = "mail_objects" + __table_args__ = ( + db.Index("idx_mail_object_mail_message_id", "mail_message_id"), + ) id = db.Column(db.Integer, primary_key=True) mail_message_id = db.Column(db.Integer, db.ForeignKey("mail_messages.id"), nullable=False) @@ -434,6 +448,10 @@ class Ticket(db.Model): class TicketScope(db.Model): __tablename__ = "ticket_scopes" + __table_args__ = ( + db.Index("idx_ticket_scope_ticket_id", "ticket_id"), + db.Index("idx_ticket_scope_job_id", "job_id"), + ) id = db.Column(db.Integer, primary_key=True) ticket_id = db.Column(db.Integer, db.ForeignKey("tickets.id"), nullable=False) scope_type = db.Column(db.String(32), nullable=False) @@ -475,6 +493,10 @@ class Remark(db.Model): class RemarkScope(db.Model): __tablename__ = "remark_scopes" + __table_args__ = ( + db.Index("idx_remark_scope_remark_id", "remark_id"), + db.Index("idx_remark_scope_job_id", "job_id"), + ) id = db.Column(db.Integer, primary_key=True) remark_id = db.Column(db.Integer, db.ForeignKey("remarks.id"), nullable=False) scope_type = db.Column(db.String(32), nullable=False) diff --git a/docs/changelog-claude.md b/docs/changelog-claude.md index a5c7cac..7f3071e 100644 --- a/docs/changelog-claude.md +++ b/docs/changelog-claude.md @@ -11,3 +11,17 @@ This file documents all changes made to this project via Claude Code. - Converted changelog to English (all project documentation must be in English) - Documented branch naming convention and build workflow in Claude memory - Filled README.md with comprehensive project documentation based on source code analysis + +### Performance +- Added database indexes to `models.py` for frequently queried foreign key columns: + - `JobRun`: indexes on `job_id`, `job_id+run_at`, `job_id+reviewed_at`, `mail_message_id` + - `MailMessage`: indexes on `job_id`, `location`, `job_id+location` + - `MailObject`: index on `mail_message_id` + - `TicketScope`: indexes on `ticket_id`, `job_id` + - `RemarkScope`: indexes on `remark_id`, `job_id` +- Fixed N+1 query in `_recompute_override_flags_for_runs()` - batch loads all jobs instead of per-run queries +- Optimized Daily Jobs page with batch queries: + - Batch load all today's runs for all jobs in single query + - Batch infer weekly schedules for all jobs (was per-job query) + - Batch infer monthly schedules for jobs without weekly schedule + - Batch load ticket/remark indicators for all jobs