From 0cdeabc0e64ff822ab9cd40838329999e05e7e6c Mon Sep 17 00:00:00 2001
From: Ivo Oskamp <ivooskamp@oskamp.info>
Date: Tue, 26 May 2026 15:12:35 +0200
Subject: [PATCH] Dev build 2026-05-26 15:12

---
 containers/clearview/alembic.ini              |   45 +
 containers/clearview/requirements.txt         |    1 +
 containers/clearview/site/app.js              |   67 +-
 containers/clearview/site/index.html          |   27 +
 containers/clearview/site/styles.css          |   56 +-
 .../src/clearview_app/api_helpers.py          |  321 +++++
 .../clearview/src/clearview_app/api_jobs.py   |  645 ++++++++++
 .../src/clearview_app/api_onboarding.py       |   76 ++
 .../src/clearview_app/api_tenants.py          |   86 ++
 .../clearview/src/clearview_app/cert.py       |    6 +-
 .../clearview/src/clearview_app/db_migrate.py |   53 +
 .../clearview/src/clearview_app/main.py       | 1122 +----------------
 .../src/clearview_app/migrations/env.py       |   58 +
 .../clearview_app/migrations/script.py.mako   |   26 +
 .../migrations/versions/0001_baseline.py      |   31 +
 .../migrations/versions/0002_timestamptz.py   |   63 +
 .../clearview/src/clearview_app/models.py     |   35 +-
 .../src/clearview_app/scanners/sharepoint.py  |   63 +-
 .../clearview/src/clearview_app/schemas.py    |    7 +-
 .../clearview/src/clearview_app/version.py    |    2 +-
 .../clearview/src/clearview_app/worker.py     |   24 +-
 docs/changelog-develop.md                     |   36 +
 22 files changed, 1639 insertions(+), 1211 deletions(-)
 create mode 100644 containers/clearview/alembic.ini
 create mode 100644 containers/clearview/src/clearview_app/api_helpers.py
 create mode 100644 containers/clearview/src/clearview_app/api_jobs.py
 create mode 100644 containers/clearview/src/clearview_app/api_onboarding.py
 create mode 100644 containers/clearview/src/clearview_app/api_tenants.py
 create mode 100644 containers/clearview/src/clearview_app/db_migrate.py
 create mode 100644 containers/clearview/src/clearview_app/migrations/env.py
 create mode 100644 containers/clearview/src/clearview_app/migrations/script.py.mako
 create mode 100644 containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
 create mode 100644 containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py

diff --git a/containers/clearview/alembic.ini b/containers/clearview/alembic.ini
new file mode 100644
index 0000000..4c8bc67
--- /dev/null
+++ b/containers/clearview/alembic.ini
@@ -0,0 +1,45 @@
+# Alembic config for manual CLI use during development, e.g.:
+#   cd containers/clearview && DATABASE_URL=postgresql://... PYTHONPATH=src alembic revision -m "msg"
+#
+# The application itself does NOT read this file: clearview_app.db_migrate builds
+# an Alembic Config programmatically and env.py takes the database URL from
+# DATABASE_URL via clearview_app.config. sqlalchemy.url is therefore left blank.
+
+[alembic]
+script_location = src/clearview_app/migrations
+prepend_sys_path = src
+sqlalchemy.url =
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/containers/clearview/requirements.txt b/containers/clearview/requirements.txt
index fdc85c8..7da64eb 100644
--- a/containers/clearview/requirements.txt
+++ b/containers/clearview/requirements.txt
@@ -1,6 +1,7 @@
 fastapi==0.115.0
 uvicorn[standard]==0.30.6
 sqlalchemy==2.0.36
+alembic==1.14.0
 psycopg[binary]==3.2.3
 python-multipart==0.0.12
 requests==2.32.3
diff --git a/containers/clearview/site/app.js b/containers/clearview/site/app.js
index e5ce1ac..8920b97 100644
--- a/containers/clearview/site/app.js
+++ b/containers/clearview/site/app.js
@@ -104,6 +104,8 @@
     statTenants: document.getElementById('statTenants'),
     statJobs: document.getElementById('statJobs'),
     statRunning: document.getElementById('statRunning'),
+    statErrors: document.getElementById('statErrors'),
+    dashRecentJobs: document.getElementById('dashRecentJobs'),
   };
 
   // -------------------------------------------------------------------------
@@ -610,6 +612,36 @@
   // Jobs list
   // -------------------------------------------------------------------------
 
+  function renderDashRecent(jobs) {
+    if (!els.dashRecentJobs) return;
+    if (!jobs.length) {
+      els.dashRecentJobs.innerHTML = '<tr><td colspan="6">No jobs yet.</td></tr>';
+      return;
+    }
+    els.dashRecentJobs.innerHTML = jobs.slice(0, 5).map(function (job) {
+      var jobIdSafe = escHtml(job.id);
+      var tenantLabel = job.tenant_name
+        ? escHtml(job.tenant_name)
+        : '<span style="color:var(--cv-text-secondary)">manual</span>';
+      var progress = job.total_targets > 0 ? (job.processed_targets + '/' + job.total_targets) : '0/0';
+      return '<tr style="cursor:pointer" data-dash-job="' + jobIdSafe + '">' +
+        '<td><code>' + jobIdSafe + '</code></td>' +
+        '<td>' + escHtml(job.scan_type || 'sharepoint') + '</td>' +
+        '<td>' + tenantLabel + '</td>' +
+        '<td>' + statusBadge(job.status) + '</td>' +
+        '<td>' + progress + '</td>' +
+        '<td>' + formatDate(job.updated_at) + '</td>' +
+        '</tr>';
+    }).join('');
+    els.dashRecentJobs.querySelectorAll('[data-dash-job]').forEach(function (row) {
+      row.addEventListener('click', function () {
+        state.selectedJobId = row.getAttribute('data-dash-job');
+        navigateTo('jobs');
+        refreshSelectedJob().catch(function () {});
+      });
+    });
+  }
+
   async function refreshJobs() {
     const filterTenant = els.jobTenantFilter.value;
     const filterType = els.jobTypeFilter ? els.jobTypeFilter.value : '';
@@ -626,6 +658,12 @@
     els.statRunning.textContent = String(jobs.filter(function (j) {
       return j.status === 'running' || j.status === 'queued';
     }).length);
+    if (els.statErrors) {
+      els.statErrors.textContent = String(jobs.filter(function (j) {
+        return j.status === 'completed_with_errors' || (j.failed_targets || 0) > 0;
+      }).length);
+    }
+    renderDashRecent(jobs);
 
     if (!jobs.length) {
       els.jobsTableBody.innerHTML = '<tr><td colspan="9">No jobs yet.</td></tr>';
@@ -654,22 +692,23 @@
       } else {
         typeLabel = '<span class="risk ok">SharePoint</span>';
       }
+      const jobIdSafe = escHtml(job.id);
       return (
         '<tr>' +
-          '<td><code>' + job.id + '</code></td>' +
+          '<td><code>' + jobIdSafe + '</code></td>' +
           '<td>' + typeLabel + '</td>' +
           '<td>' + tenantLabel + '</td>' +
-          '<td>' + job.source_type + '</td>' +
+          '<td>' + escHtml(job.source_type) + '</td>' +
           '<td>' + statusBadge(job.status) + '</td>' +
           '<td>' + progress + '</td>' +
           '<td>' + (job.items_scanned > 0 ? job.items_scanned : '-') + '</td>' +
           '<td>' + formatDate(job.updated_at) + '</td>' +
           '<td>' +
             '<div style="display:flex;gap:0.4rem">' +
-              '<button class="btn btn-outline btn-small" data-job-inspect="' + job.id + '">Inspect</button>' +
+              '<button class="btn btn-outline btn-small" data-job-inspect="' + jobIdSafe + '">Inspect</button>' +
               (job.status === 'queued' || job.status === 'running'
-                ? '<button class="btn btn-outline btn-small" data-job-cancel="' + job.id + '">Cancel</button>'
-                : '<button class="btn btn-outline btn-small" data-job-delete="' + job.id + '">Delete</button>') +
+                ? '<button class="btn btn-outline btn-small" data-job-cancel="' + jobIdSafe + '">Cancel</button>'
+                : '<button class="btn btn-outline btn-small" data-job-delete="' + jobIdSafe + '">Delete</button>') +
             '</div>' +
           '</td>' +
         '</tr>'
@@ -1527,14 +1566,16 @@
     return hash;
   }
 
-  function applyRoute(route) {
+  function applyRoute(route, moveFocus) {
     if (!ROUTE_TITLES[route]) {
       route = 'dashboard';
     }
     state.currentRoute = route;
+    var activePage = null;
     document.querySelectorAll('.route-page').forEach(function (page) {
       if (page.getAttribute('data-route-page') === route) {
         page.removeAttribute('hidden');
+        activePage = page;
       } else {
         page.setAttribute('hidden', '');
       }
@@ -1549,6 +1590,16 @@
     if (els.contentTitle) {
       els.contentTitle.textContent = ROUTE_TITLES[route];
     }
+    document.title = 'Clearview | ' + ROUTE_TITLES[route];
+    // On user navigation, move focus to the new page's first heading so
+    // screen-reader and keyboard users land in the freshly shown content.
+    if (moveFocus && activePage) {
+      var heading = activePage.querySelector('h1, h2');
+      if (heading) {
+        heading.setAttribute('tabindex', '-1');
+        heading.focus();
+      }
+    }
   }
 
   function navigateTo(route) {
@@ -1560,12 +1611,12 @@
     if (window.location.hash !== hash) {
       window.location.hash = hash;
     } else {
-      applyRoute(route);
+      applyRoute(route, true);
     }
   }
 
   window.addEventListener('hashchange', function () {
-    applyRoute(parseRoute());
+    applyRoute(parseRoute(), true);
   });
 
   applyRoute(parseRoute());
diff --git a/containers/clearview/site/index.html b/containers/clearview/site/index.html
index 2f63b0c..d9e9187 100644
--- a/containers/clearview/site/index.html
+++ b/containers/clearview/site/index.html
@@ -73,6 +73,33 @@
             <span class="kpi" id="statRunning">0</span>
             <span class="label">Active Jobs</span>
           </article>
+          <article>
+            <span class="kpi" id="statErrors">0</span>
+            <span class="label">With errors</span>
+          </article>
+        </div>
+      </div>
+
+      <div class="panel">
+        <div class="panel-header split">
+          <h2>Recent jobs</h2>
+        </div>
+        <div class="table-wrap">
+          <table>
+            <thead>
+              <tr>
+                <th>Job ID</th>
+                <th>Type</th>
+                <th>Tenant</th>
+                <th>Status</th>
+                <th>Targets</th>
+                <th>Updated</th>
+              </tr>
+            </thead>
+            <tbody id="dashRecentJobs">
+              <tr><td colspan="6">No jobs yet.</td></tr>
+            </tbody>
+          </table>
         </div>
       </div>
     </section>
diff --git a/containers/clearview/site/styles.css b/containers/clearview/site/styles.css
index 456d659..44f6172 100644
--- a/containers/clearview/site/styles.css
+++ b/containers/clearview/site/styles.css
@@ -55,38 +55,12 @@ body {
   background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0));
 }
 
-.topbar {
-  width: min(1100px, calc(100% - 2rem));
-  margin: 1.1rem auto 0;
-  padding: 0.95rem 1.1rem;
-  border: 1px solid var(--cv-border);
-  border-radius: 18px;
-  background: rgba(255, 255, 255, 0.75);
-  backdrop-filter: blur(8px);
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  box-shadow: 0 10px 24px rgba(20, 20, 19, 0.08);
-}
-
 .brand-logo {
   height: 42px;
   width: auto;
   display: block;
 }
 
-.topbar-actions {
-  display: flex;
-  gap: 0.6rem;
-}
-
-.layout {
-  width: min(1100px, calc(100% - 2rem));
-  margin: 1rem auto 2.5rem;
-  display: grid;
-  gap: 1rem;
-}
-
 .hero,
 .panel {
   border-radius: 22px;
@@ -131,7 +105,7 @@ h2 {
 .hero-stats {
   margin-top: 1.3rem;
   display: grid;
-  grid-template-columns: repeat(3, minmax(0, 1fr));
+  grid-template-columns: repeat(4, minmax(0, 1fr));
   gap: 0.75rem;
 }
 
@@ -291,8 +265,9 @@ textarea {
 input:focus,
 select:focus,
 textarea:focus,
-button:focus {
-  outline: 2px solid rgba(14, 165, 233, 0.38);
+button:focus,
+a:focus-visible {
+  outline: 2px solid var(--cv-accent);
   outline-offset: 2px;
 }
 
@@ -533,8 +508,8 @@ strong {
 }
 
 .risk.warn {
-  background: rgba(14, 165, 233, 0.15);
-  color: var(--cv-accent-dark);
+  background: rgba(234, 179, 8, 0.18);
+  color: #854d0e;
 }
 
 .risk.high {
@@ -584,12 +559,6 @@ strong {
 }
 
 @media (max-width: 930px) {
-  .topbar {
-    flex-direction: column;
-    align-items: flex-start;
-    gap: 0.8rem;
-  }
-
   .hero-stats {
     grid-template-columns: 1fr;
   }
@@ -616,11 +585,6 @@ strong {
 }
 
 @media (max-width: 640px) {
-  .layout,
-  .topbar {
-    width: calc(100% - 1rem);
-  }
-
   .hero,
   .panel {
     border-radius: 16px;
@@ -633,14 +597,6 @@ strong {
   .hero h1 {
     max-width: none;
   }
-
-  .topbar-actions {
-    width: 100%;
-  }
-
-  .topbar-actions .btn {
-    flex: 1;
-  }
 }
 
 /* ===========================================================================
diff --git a/containers/clearview/src/clearview_app/api_helpers.py b/containers/clearview/src/clearview_app/api_helpers.py
new file mode 100644
index 0000000..721426a
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_helpers.py
@@ -0,0 +1,321 @@
+"""Shared helpers for the API route modules.
+
+Extracted verbatim from the original monolithic ``main.py`` so the route
+modules (``api_tenants``, ``api_jobs``) can share credential resolution, job
+creation, response mapping, and export helpers without circular imports.
+"""
+from __future__ import annotations
+
+import re
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import HTTPException
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from .db import SessionLocal
+from .default_sites import is_default_site, normalize_site_url
+from .models import ScanJob, ScanTarget, TenantProfile
+from .scanners import AuthConfig
+from .schemas import ScanJobCreateResponse, ScanJobSummary, TenantProfileItem
+
+
+def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
+    """
+    Extract (group_name, link_type) from principal values such as:
+    - SharingLinks.<guid>.<LinkType>.<guid>
+    - c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
+    """
+    if not principal:
+        return None
+
+    text = principal.strip()
+    segments = [s.strip() for s in text.split("|") if s.strip()]
+
+    candidate = ""
+    for segment in reversed(segments):
+        if segment.lower().startswith("sharinglinks."):
+            candidate = segment
+            break
+    if not candidate and text.lower().startswith("sharinglinks."):
+        candidate = text
+    if not candidate:
+        return None
+
+    parts = candidate.split(".")
+    if len(parts) < 3:
+        return None
+    return candidate, parts[2]
+
+
+_SCAN_TYPE_LABELS = {
+    "sharepoint": "Deviations",
+    "sharepoint_root": "Root",
+    "mailbox": "Mailbox",
+    "entra_groups": "EntraGroups",
+}
+
+
+def _build_export_filename(job: ScanJob, job_id: str) -> str:
+    tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
+    safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
+    scan_type = job.scan_type or "sharepoint"
+    type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
+    short_id = job_id.replace("-", "")[-12:]
+    return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
+
+
+def _enumerate_all_entra_groups(
+    tenant_id: str,
+    client_id: str,
+    client_secret: str | None,
+    profile_id: str | None,
+) -> list[str]:
+    cert_private_key: str | None = None
+    cert_thumbprint: str | None = None
+    cert_public_pem: str | None = None
+    if profile_id:
+        with SessionLocal() as db:
+            profile = db.get(TenantProfile, profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+    auth = AuthConfig(
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret or "",
+        cert_private_key=cert_private_key,
+        cert_thumbprint=cert_thumbprint,
+        cert_public_pem=cert_public_pem,
+    )
+
+    from .scanners import entra as _entra
+
+    try:
+        return _entra.list_all_groups(auth)
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
+
+
+def _enumerate_all_mailboxes(
+    organization: str | None,
+    tenant_id: str,
+    client_id: str,
+    client_secret: str | None,
+    profile_id: str | None,
+) -> list[str]:
+    if not organization or "." not in organization:
+        raise HTTPException(
+            status_code=400,
+            detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
+        )
+
+    cert_private_key: str | None = None
+    cert_thumbprint: str | None = None
+    cert_public_pem: str | None = None
+    if profile_id:
+        with SessionLocal() as db:
+            profile = db.get(TenantProfile, profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+    auth = AuthConfig(
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret or "",
+        cert_private_key=cert_private_key,
+        cert_thumbprint=cert_thumbprint,
+        cert_public_pem=cert_public_pem,
+    )
+
+    from .scanners import mailbox as _mailbox
+
+    try:
+        return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
+
+
+def _resolve_credentials(
+    db,
+    tenant_profile_id: str | None,
+    tenant_id: str | None,
+    client_id: str | None,
+    client_secret: str | None,
+) -> tuple[str, str, str | None, str | None]:
+    if tenant_profile_id:
+        profile = db.get(TenantProfile, tenant_profile_id)
+        if not profile:
+            raise HTTPException(status_code=404, detail="Tenant profile not found")
+        if not profile.client_secret and not profile.cert_thumbprint:
+            raise HTTPException(
+                status_code=400,
+                detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
+            )
+        return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
+    if tenant_id and client_id and client_secret:
+        return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
+    raise HTTPException(
+        status_code=400,
+        detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
+    )
+
+
+def _create_job_from_targets(
+    raw_targets: list[str],
+    scan_type: str,
+    skip_default_sites: bool,
+    source_type: str,
+    tenant_id: str,
+    client_id: str,
+    client_secret: str,
+    tenant_profile_id: str | None = None,
+) -> ScanJobCreateResponse:
+    accepted: list[str] = []
+    skipped_default_urls: list[str] = []
+    invalid: list[str] = []
+
+    seen: set[str] = set()
+
+    for raw in raw_targets:
+        if scan_type == "mailbox":
+            normalized = (raw or "").strip().lower()
+            if not normalized or "@" not in normalized:
+                invalid.append(raw)
+                continue
+        elif scan_type == "entra_groups":
+            normalized = (raw or "").strip()
+            if not normalized:
+                invalid.append(raw)
+                continue
+        else:
+            normalized = normalize_site_url(raw) or ""
+            if not normalized:
+                invalid.append(raw)
+                continue
+
+        if normalized in seen:
+            continue
+        seen.add(normalized)
+
+        if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
+            skipped_default_urls.append(normalized)
+            continue
+
+        accepted.append(normalized)
+
+    with SessionLocal() as db:
+        now = datetime.now(timezone.utc)
+        job = ScanJob(
+            id=str(uuid.uuid4()),
+            source_type=source_type,
+            scan_type=scan_type,
+            status="queued" if accepted else "completed",
+            skip_default_sites=skip_default_sites,
+            tenant_profile_id=tenant_profile_id,
+            auth_tenant_id=tenant_id,
+            auth_client_id=client_id,
+            auth_client_secret=client_secret,
+            total_targets=len(accepted),
+            skipped_targets=len(skipped_default_urls),
+            warning_message=None,
+            error_message=None,
+            created_at=now,
+            updated_at=now,
+            finished_at=now if not accepted else None,
+        )
+
+        if not accepted:
+            if scan_type == "mailbox":
+                job.warning_message = "No scannable mailboxes after validation"
+            else:
+                job.warning_message = "No scannable sites after validation and default-site filtering"
+
+        db.add(job)
+        db.flush()
+
+        for index, target in enumerate(accepted, start=1):
+            db.add(
+                ScanTarget(
+                    job_id=job.id,
+                    site_url=target,
+                    source_row=index,
+                    status="queued",
+                    attempts=0,
+                    created_at=now,
+                    updated_at=now,
+                )
+            )
+
+        db.commit()
+
+        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
+        job = db.execute(stmt).unique().scalar_one()
+
+        return ScanJobCreateResponse(
+            job=_to_job_summary(job),
+            accepted_urls=accepted,
+            skipped_default_urls=skipped_default_urls,
+            invalid_urls=invalid,
+        )
+
+
+def _to_job_summary(job: ScanJob) -> ScanJobSummary:
+    return ScanJobSummary(
+        id=job.id,
+        status=job.status,
+        source_type=job.source_type,
+        scan_type=job.scan_type or "sharepoint",
+        skip_default_sites=job.skip_default_sites,
+        tenant_profile_id=job.tenant_profile_id,
+        tenant_name=job.tenant_profile.name if job.tenant_profile else None,
+        total_targets=job.total_targets,
+        processed_targets=job.processed_targets,
+        successful_targets=job.successful_targets,
+        failed_targets=job.failed_targets,
+        skipped_targets=job.skipped_targets,
+        items_scanned=job.items_scanned,
+        scan_activity=job.scan_activity if job.status == "running" else None,
+        warning_message=job.warning_message,
+        error_message=job.error_message,
+        created_at=job.created_at,
+        updated_at=job.updated_at,
+        started_at=job.started_at,
+        finished_at=job.finished_at,
+    )
+
+
+def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
+    return TenantProfileItem(
+        id=profile.id,
+        name=profile.name,
+        tenant_id=profile.tenant_id,
+        primary_domain=profile.primary_domain,
+        client_id=profile.client_id,
+        has_certificate=bool(profile.cert_thumbprint),
+        cert_thumbprint=profile.cert_thumbprint,
+        cert_expires_at=profile.cert_expires_at,
+        created_at=profile.created_at,
+        updated_at=profile.updated_at,
+    )
+
+
+def _sharing_link_risk_label(principal: str) -> str:
+    if not principal.startswith("SharingLinks."):
+        return ""
+    parts = principal.split(".", 3)
+    link_type = parts[2] if len(parts) >= 3 else ""
+    if link_type.startswith("Anonymous"):
+        return "Critical"
+    if link_type == "Flexible":
+        return "High"
+    if link_type.startswith("Organization"):
+        return "Low"
+    if link_type.startswith("Direct"):
+        return "Low"
+    return "Unknown"
diff --git a/containers/clearview/src/clearview_app/api_jobs.py b/containers/clearview/src/clearview_app/api_jobs.py
new file mode 100644
index 0000000..c9cb6a4
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_jobs.py
@@ -0,0 +1,645 @@
+"""Scan-job routes: create, list, inspect, cancel, delete, resolve, export."""
+from __future__ import annotations
+
+import io
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+from fastapi.responses import Response, StreamingResponse
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from .api_helpers import (
+    _build_export_filename,
+    _create_job_from_targets,
+    _enumerate_all_entra_groups,
+    _enumerate_all_mailboxes,
+    _extract_sharing_link_group_and_type,
+    _resolve_credentials,
+    _sharing_link_risk_label,
+    _to_job_summary,
+)
+from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
+from .db import SessionLocal
+from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
+from .scanners import AuthConfig, probe
+from .schemas import (
+    CreateScanJobRequest,
+    PermissionDeviationItem,
+    ProbeResultResponse,
+    ResolveGroupsResponse,
+    ResolveSharingLinksRequest,
+    ResolveSharingLinksResponse,
+    ScanJobCreateResponse,
+    ScanJobDetail,
+    ScanJobSummary,
+    ScanTargetItem,
+    SharingLinkTypesResponse,
+)
+
+router = APIRouter()
+
+
+@router.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
+def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
+    with SessionLocal() as db:
+        tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
+            db=db,
+            tenant_profile_id=payload.tenant_profile_id,
+            tenant_id=payload.tenant_id,
+            client_id=payload.client_id,
+            client_secret=payload.client_secret,
+        )
+    source_type = "manual"
+    if payload.scan_type == "entra_groups":
+        if payload.scan_all_groups:
+            raw_targets = _enumerate_all_entra_groups(
+                tenant_id=tenant_id,
+                client_id=client_id,
+                client_secret=client_secret,
+                profile_id=profile_id,
+            )
+            source_type = "tenant_all"
+        else:
+            raw_targets = [str(g) for g in payload.group_ids]
+    elif payload.scan_type == "mailbox":
+        if payload.scan_all_mailboxes:
+            organization = payload.organization
+            if (not organization) and profile_id:
+                with SessionLocal() as db:
+                    profile = db.get(TenantProfile, profile_id)
+                    if profile and profile.primary_domain:
+                        organization = profile.primary_domain
+            raw_targets = _enumerate_all_mailboxes(
+                organization=organization,
+                tenant_id=tenant_id,
+                client_id=client_id,
+                client_secret=client_secret,
+                profile_id=profile_id,
+            )
+            source_type = "tenant_all"
+        else:
+            raw_targets = [str(m) for m in payload.mailboxes]
+    else:
+        raw_targets = [str(item) for item in payload.site_urls]
+    return _create_job_from_targets(
+        raw_targets=raw_targets,
+        scan_type=payload.scan_type,
+        skip_default_sites=payload.skip_default_sites,
+        source_type=source_type,
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret,
+        tenant_profile_id=profile_id,
+    )
+
+
+@router.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
+def create_scan_job_from_csv(
+    skip_default_sites: bool = True,
+    scan_type: str = Form("sharepoint"),
+    tenant_profile_id: str | None = Form(None),
+    tenant_id: str | None = Form(None),
+    client_id: str | None = Form(None),
+    client_secret: str | None = Form(None),
+    file: UploadFile = File(...),
+) -> ScanJobCreateResponse:
+    with SessionLocal() as db:
+        resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
+            db=db,
+            tenant_profile_id=tenant_profile_id,
+            tenant_id=tenant_id,
+            client_id=client_id,
+            client_secret=client_secret,
+        )
+    content = file.file.read()
+    if scan_type == "mailbox":
+        parsed = parse_mailboxes_csv(content)
+        targets = parsed.mailboxes
+    elif scan_type == "entra_groups":
+        parsed = parse_entra_groups_csv(content)
+        targets = parsed.urls
+    else:
+        parsed = parse_sites_csv(content)
+        targets = parsed.urls
+    response = _create_job_from_targets(
+        raw_targets=targets,
+        scan_type=scan_type,
+        skip_default_sites=skip_default_sites,
+        source_type="csv",
+        tenant_id=resolved_tenant_id,
+        client_id=resolved_client_id,
+        client_secret=resolved_client_secret,
+        tenant_profile_id=profile_id,
+    )
+
+    if parsed.invalid_rows:
+        csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
+        with SessionLocal() as db:
+            job = db.get(ScanJob, response.job.id)
+            if job:
+                if job.warning_message:
+                    job.warning_message = f"{job.warning_message} | {csv_warning}"
+                else:
+                    job.warning_message = csv_warning
+                job.updated_at = datetime.now(timezone.utc)
+                db.commit()
+                db.refresh(job)
+                response.job.warning_message = job.warning_message
+
+    return response
+
+
+@router.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
+def cancel_scan_job(job_id: str) -> ScanJobSummary:
+    with SessionLocal() as db:
+        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
+        job = db.execute(stmt).unique().scalar_one_or_none()
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status not in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is not queued or running")
+        now = datetime.now(timezone.utc)
+        job.status = "cancelled"
+        job.updated_at = now
+        job.finished_at = now
+        job.scan_activity = None
+        db.commit()
+        db.refresh(job)
+        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
+        job = db.execute(stmt).unique().scalar_one()
+        return _to_job_summary(job)
+
+
+@router.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
+def delete_scan_job(job_id: str) -> Response:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
+        db.delete(job)
+        db.commit()
+        return Response(status_code=204)
+
+
+@router.get("/api/scan-jobs", response_model=list[ScanJobSummary])
+def list_scan_jobs(
+    limit: int = 20,
+    tenant_profile_id: str | None = None,
+    scan_type: str | None = None,
+) -> list[ScanJobSummary]:
+    with SessionLocal() as db:
+        stmt = (
+            select(ScanJob)
+            .options(joinedload(ScanJob.tenant_profile))
+            .order_by(ScanJob.created_at.desc())
+            .limit(max(1, min(limit, 100)))
+        )
+        if tenant_profile_id:
+            stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
+        if scan_type:
+            stmt = stmt.where(ScanJob.scan_type == scan_type)
+        jobs = list(db.execute(stmt).unique().scalars())
+        return [_to_job_summary(job) for job in jobs]
+
+
+@router.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
+def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        principals = list(
+            db.execute(
+                select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
+            ).scalars()
+        )
+
+    type_counts: dict[str, int] = {}
+    for principal in principals:
+        parsed = _extract_sharing_link_group_and_type(str(principal or ""))
+        if not parsed:
+            continue
+        _group_name, link_type = parsed
+        type_counts[link_type] = type_counts.get(link_type, 0) + 1
+
+    return SharingLinkTypesResponse(type_counts=type_counts)
+
+
+@router.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
+def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
+    from .scanner import resolve_sharing_link_members
+
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is still running")
+
+        cert_private_key: str | None = None
+        cert_thumbprint: str | None = None
+        cert_public_pem: str | None = None
+        if job.tenant_profile_id:
+            profile = db.get(TenantProfile, job.tenant_profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+        auth = AuthConfig(
+            tenant_id=job.auth_tenant_id or "",
+            client_id=job.auth_client_id or "",
+            client_secret=job.auth_client_secret or "",
+            cert_private_key=cert_private_key,
+            cert_thumbprint=cert_thumbprint,
+            cert_public_pem=cert_public_pem,
+        )
+
+        all_deviations = list(
+            db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
+        )
+
+    # Group by (site_url, principal) so each unique group is resolved once
+    groups: dict[tuple[str, str], list[int]] = {}
+    for dev in all_deviations:
+        parsed = _extract_sharing_link_group_and_type(dev.principal)
+        if not parsed:
+            continue
+        group_name, link_type = parsed
+        if link_type not in payload.link_types:
+            continue
+        key = (dev.site_url, group_name)
+        groups.setdefault(key, []).append(dev.id)
+
+    updated_deviations = 0
+    for (site_url, group_name), dev_ids in groups.items():
+        members = resolve_sharing_link_members(site_url, group_name, auth)
+        resolved_members = ", ".join(members) if members else ""
+        with SessionLocal() as db:
+            for dev_id in dev_ids:
+                dev = db.get(PermissionDeviation, dev_id)
+                if dev:
+                    dev.resolved_members = resolved_members
+            db.commit()
+        updated_deviations += len(dev_ids)
+
+    return ResolveSharingLinksResponse(
+        resolved_groups=len(groups),
+        updated_deviations=updated_deviations,
+    )
+
+
+@router.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
+def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
+    """
+    Expand group principals on this job's deviations and write each group's
+    member list to permission_deviations.resolved_members. Handles both
+    classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
+    assigned directly at root (via Microsoft Graph). Skips email-shape users
+    and SharingLinks groups (those have their own resolver).
+    """
+    from .scanners.sharepoint import (
+        is_aad_group_principal,
+        is_sharepoint_group_principal,
+        resolve_aad_group_members,
+        resolve_sharing_link_members,
+    )
+
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is still running")
+        if (job.scan_type or "sharepoint") == "mailbox":
+            raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
+
+        cert_private_key: str | None = None
+        cert_thumbprint: str | None = None
+        cert_public_pem: str | None = None
+        if job.tenant_profile_id:
+            profile = db.get(TenantProfile, job.tenant_profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+        auth = AuthConfig(
+            tenant_id=job.auth_tenant_id or "",
+            client_id=job.auth_client_id or "",
+            client_secret=job.auth_client_secret or "",
+            cert_private_key=cert_private_key,
+            cert_thumbprint=cert_thumbprint,
+            cert_public_pem=cert_public_pem,
+        )
+
+        all_deviations = list(
+            db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
+        )
+
+    # Group deviations by (site_url, principal) so each unique group is resolved once
+    groups: dict[tuple[str, str], list[int]] = {}
+    for dev in all_deviations:
+        if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
+            continue
+        key = (dev.site_url, dev.principal)
+        groups.setdefault(key, []).append(dev.id)
+
+    resolved = 0
+    skipped = 0
+    updated = 0
+    for (site_url, group_name), dev_ids in groups.items():
+        try:
+            if is_aad_group_principal(group_name):
+                members = resolve_aad_group_members(group_name, auth)
+            else:
+                members = resolve_sharing_link_members(site_url, group_name, auth)
+        except Exception:  # noqa: BLE001
+            members = []
+
+        if not members:
+            skipped += 1
+            continue
+
+        resolved_text = ", ".join(members)
+        with SessionLocal() as db:
+            for dev_id in dev_ids:
+                dev = db.get(PermissionDeviation, dev_id)
+                if dev:
+                    dev.resolved_members = resolved_text
+            db.commit()
+        resolved += 1
+        updated += len(dev_ids)
+
+    return ResolveGroupsResponse(
+        resolved_groups=resolved,
+        skipped_groups=skipped,
+        updated_deviations=updated,
+    )
+
+
+@router.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
+def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        target = db.get(ScanTarget, target_id)
+        if not target or target.job_id != job_id:
+            raise HTTPException(status_code=404, detail="Target not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is still running")
+
+        cert_private_key: str | None = None
+        cert_thumbprint: str | None = None
+        cert_public_pem: str | None = None
+        if job.tenant_profile_id:
+            profile = db.get(TenantProfile, job.tenant_profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+        auth = AuthConfig(
+            tenant_id=job.auth_tenant_id or "",
+            client_id=job.auth_client_id or "",
+            client_secret=job.auth_client_secret or "",
+            cert_private_key=cert_private_key,
+            cert_thumbprint=cert_thumbprint,
+            cert_public_pem=cert_public_pem,
+        )
+        site_url = target.site_url
+        job_scan_type = job.scan_type or "sharepoint"
+
+    result = probe(job_scan_type, site_url, auth)
+
+    with SessionLocal() as db:
+        target = db.get(ScanTarget, target_id)
+        if not target:
+            raise HTTPException(status_code=404, detail="Target not found")
+        now = datetime.now(timezone.utc)
+        target.last_probe_at = now
+        target.last_probe_ok = result.ok
+        target.last_probe_message = result.message
+        target.updated_at = now
+        db.commit()
+        db.refresh(target)
+        return ProbeResultResponse(
+            target_id=target.id,
+            ok=result.ok,
+            message=result.message,
+            last_probe_at=target.last_probe_at,
+        )
+
+
+@router.get("/api/scan-jobs/{job_id}/export")
+def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
+    import openpyxl
+    from openpyxl.styles import Font, PatternFill
+
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
+        if site_url:
+            targets_q = targets_q.where(ScanTarget.site_url == site_url)
+        targets = list(db.execute(targets_q).scalars())
+
+        deviations_q = (
+            select(PermissionDeviation)
+            .where(PermissionDeviation.job_id == job.id)
+            .order_by(PermissionDeviation.id.desc())
+        )
+        if site_url:
+            deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
+        deviations = list(db.execute(deviations_q).scalars())
+
+    wb = openpyxl.Workbook()
+    header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
+    header_font_white = Font(bold=True, color="FFFFFF")
+
+    _risk_styles: dict[str, tuple] = {
+        "Critical": (
+            PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
+            Font(bold=True, color="7B0000"),
+        ),
+        "High": (
+            PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
+            Font(bold=True, color="7C2D00"),
+        ),
+        "Low": (
+            PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
+            Font(bold=True, color="0C4A6E"),
+        ),
+        "Unknown": (
+            PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
+            Font(bold=True, color="555555"),
+        ),
+    }
+
+    def _style_header(ws, headers):
+        ws.append(headers)
+        for cell in ws[1]:
+            cell.font = header_font_white
+            cell.fill = header_fill
+
+    scan_type = job.scan_type or "sharepoint"
+
+    target_label = {
+        "sharepoint": "Site URL",
+        "sharepoint_root": "Site URL",
+        "mailbox": "Mailbox",
+        "entra_groups": "Group",
+    }.get(scan_type, "Target")
+
+    # Targets sheet
+    ws_targets = wb.active
+    ws_targets.title = "Targets"
+    _style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
+    for t in targets:
+        ws_targets.append([
+            t.site_url,
+            t.status,
+            t.attempts,
+            t.error_message or "",
+            t.started_at.isoformat() if t.started_at else "",
+            t.finished_at.isoformat() if t.finished_at else "",
+        ])
+    for col in ws_targets.columns:
+        ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
+
+    # Results sheet — name and columns depend on scan type
+    if scan_type == "mailbox":
+        ws_dev = wb.create_sheet("Mailbox Permissions")
+        _style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
+        deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
+        for d in deviations:
+            ws_dev.append([
+                d.site_url,
+                d.object_url,
+                d.permission_type or d.object_type,
+                d.principal,
+                d.role_name,
+            ])
+    elif scan_type == "entra_groups":
+        ws_dev = wb.create_sheet("Group Memberships")
+        _style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
+        deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
+        for d in deviations:
+            ws_dev.append([
+                d.object_url,
+                d.permission_type or "",
+                d.principal,
+                d.role_name,
+            ])
+    elif scan_type == "sharepoint_root":
+        ws_dev = wb.create_sheet("Root Permissions")
+        _style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
+        deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
+        for d in deviations:
+            ws_dev.append([
+                d.site_url,
+                d.principal,
+                d.resolved_members or "",
+                d.role_name,
+            ])
+    else:
+        ws_dev = wb.create_sheet("Deviations")
+        _style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
+        deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
+        for d in deviations:
+            base = (d.site_url or "").rstrip("/")
+            obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
+            link_risk = _sharing_link_risk_label(d.principal)
+            ws_dev.append([
+                d.site_url,
+                obj_rel,
+                d.object_type,
+                d.principal,
+                link_risk,
+                d.resolved_members or "",
+                d.role_name,
+                d.delta_type,
+            ])
+            if link_risk in _risk_styles:
+                risk_fill, risk_font = _risk_styles[link_risk]
+                risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
+                risk_cell.fill = risk_fill
+                risk_cell.font = risk_font
+    for col in ws_dev.columns:
+        ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
+
+    buf = io.BytesIO()
+    wb.save(buf)
+    buf.seek(0)
+
+    filename = _build_export_filename(job, job_id)
+    return StreamingResponse(
+        buf,
+        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+
+
+@router.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
+def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
+        if site_url:
+            targets_q = targets_q.where(ScanTarget.site_url == site_url)
+        targets = list(db.execute(targets_q).scalars())
+
+        deviations_q = (
+            select(PermissionDeviation)
+            .where(PermissionDeviation.job_id == job.id)
+            .order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
+        )
+        if site_url:
+            deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
+        else:
+            deviations_q = deviations_q.limit(1000)
+        deviations = list(db.execute(deviations_q).scalars())
+
+        return ScanJobDetail(
+            **_to_job_summary(job).model_dump(),
+            targets=[
+                ScanTargetItem(
+                    id=t.id,
+                    site_url=t.site_url,
+                    status=t.status,
+                    attempts=t.attempts,
+                    error_message=t.error_message,
+                    started_at=t.started_at,
+                    finished_at=t.finished_at,
+                    last_probe_at=t.last_probe_at,
+                    last_probe_ok=t.last_probe_ok,
+                    last_probe_message=t.last_probe_message,
+                )
+                for t in targets
+            ],
+            deviations=[
+                PermissionDeviationItem(
+                    id=d.id,
+                    site_url=d.site_url,
+                    object_url=d.object_url,
+                    object_type=d.object_type,
+                    principal=d.principal,
+                    role_name=d.role_name,
+                    delta_type=d.delta_type,
+                    permission_type=d.permission_type,
+                    resolved_members=d.resolved_members,
+                    created_at=d.created_at,
+                )
+                for d in deviations
+            ],
+        )
diff --git a/containers/clearview/src/clearview_app/api_onboarding.py b/containers/clearview/src/clearview_app/api_onboarding.py
new file mode 100644
index 0000000..979107c
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_onboarding.py
@@ -0,0 +1,76 @@
+"""Microsoft onboarding routes (admin-consent connect + scan-app creation)."""
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import RedirectResponse
+
+from .onboarding import (
+    OnboardingError,
+    consume_callback_state,
+    create_connect_url,
+    create_scan_app_for_tenant,
+)
+from .schemas import (
+    ConnectMicrosoftResponse,
+    CreateScanAppRequest,
+    CreateScanAppResponse,
+)
+
+router = APIRouter()
+
+
+@router.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
+def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
+    try:
+        result = create_scan_app_for_tenant(
+            tenant_id=payload.tenant_id,
+            display_name=payload.display_name,
+        )
+    except OnboardingError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
+
+    return CreateScanAppResponse(
+        tenant_id=result.tenant_id,
+        client_id=result.client_id,
+        client_secret=result.client_secret,
+        app_object_id=result.app_object_id,
+        service_principal_id=result.service_principal_id,
+        display_name=result.display_name,
+    )
+
+
+@router.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
+def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
+    try:
+        return ConnectMicrosoftResponse(connect_url=create_connect_url())
+    except OnboardingError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/api/onboarding/microsoft/callback")
+def onboarding_microsoft_callback(
+    tenant: str | None = None,
+    state: str | None = None,
+    error: str | None = None,
+    error_description: str | None = None,
+) -> RedirectResponse:
+    if error:
+        message = (error_description or error).replace(" ", "+")
+        return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
+
+    if not state or not consume_callback_state(state):
+        return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
+
+    if not tenant:
+        return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
+
+    return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
+
+
+@router.get("/api/onboarding/status")
+def onboarding_status() -> dict[str, bool]:
+    from . import config
+    automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
+    return {"automated_available": automated}
diff --git a/containers/clearview/src/clearview_app/api_tenants.py b/containers/clearview/src/clearview_app/api_tenants.py
new file mode 100644
index 0000000..f8cb473
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_tenants.py
@@ -0,0 +1,86 @@
+"""Tenant profile + certificate routes."""
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import Response
+from sqlalchemy import select, text
+
+from .api_helpers import _to_tenant_item
+from .cert import generate_tenant_certificate
+from .db import SessionLocal
+from .models import TenantProfile
+from .schemas import (
+    CreateTenantProfileRequest,
+    TenantCertificateResponse,
+    TenantProfileItem,
+)
+
+router = APIRouter()
+
+
+@router.get("/api/tenants", response_model=list[TenantProfileItem])
+def list_tenants() -> list[TenantProfileItem]:
+    with SessionLocal() as db:
+        profiles = list(
+            db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
+        )
+        return [_to_tenant_item(p) for p in profiles]
+
+
+@router.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
+def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
+    with SessionLocal() as db:
+        now = datetime.now(timezone.utc)
+        profile = TenantProfile(
+            id=str(uuid.uuid4()),
+            name=payload.name.strip(),
+            tenant_id=payload.tenant_id.strip(),
+            primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
+            client_id=payload.client_id.strip(),
+            client_secret=payload.client_secret.strip() if payload.client_secret else None,
+            created_at=now,
+            updated_at=now,
+        )
+        db.add(profile)
+        db.commit()
+        db.refresh(profile)
+        return _to_tenant_item(profile)
+
+
+@router.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
+def generate_certificate(profile_id: str) -> TenantCertificateResponse:
+    with SessionLocal() as db:
+        profile = db.get(TenantProfile, profile_id)
+        if not profile:
+            raise HTTPException(status_code=404, detail="Tenant profile not found")
+        result = generate_tenant_certificate()
+        profile.cert_private_key = result.private_key_pem
+        profile.cert_public_pem = result.public_cert_pem
+        profile.cert_thumbprint = result.thumbprint
+        profile.cert_expires_at = result.expires_at
+        profile.updated_at = datetime.now(timezone.utc)
+        db.commit()
+        return TenantCertificateResponse(
+            thumbprint=result.thumbprint,
+            expires_at=result.expires_at,
+            public_cert_pem=result.public_cert_pem,
+        )
+
+
+@router.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
+def delete_tenant(profile_id: str) -> Response:
+    with SessionLocal() as db:
+        profile = db.get(TenantProfile, profile_id)
+        if not profile:
+            raise HTTPException(status_code=404, detail="Tenant profile not found")
+        # Detach jobs from this profile before deleting
+        db.execute(
+            text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
+            {"pid": profile_id},
+        )
+        db.delete(profile)
+        db.commit()
+        return Response(status_code=204)
diff --git a/containers/clearview/src/clearview_app/cert.py b/containers/clearview/src/clearview_app/cert.py
index accbb54..85f035f 100644
--- a/containers/clearview/src/clearview_app/cert.py
+++ b/containers/clearview/src/clearview_app/cert.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 import hashlib
 from dataclasses import dataclass
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 
 from cryptography import x509
 from cryptography.hazmat.primitives import hashes, serialization
@@ -30,7 +30,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
     subject = x509.Name([
         x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"),
     ])
-    expires_at = datetime.utcnow() + timedelta(days=365 * valid_years)
+    expires_at = datetime.now(timezone.utc) + timedelta(days=365 * valid_years)
 
     cert = (
         x509.CertificateBuilder()
@@ -38,7 +38,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
         .issuer_name(subject)
         .public_key(private_key.public_key())
         .serial_number(x509.random_serial_number())
-        .not_valid_before(datetime.utcnow())
+        .not_valid_before(datetime.now(timezone.utc))
         .not_valid_after(expires_at)
         .sign(private_key, hashes.SHA256())
     )
diff --git a/containers/clearview/src/clearview_app/db_migrate.py b/containers/clearview/src/clearview_app/db_migrate.py
new file mode 100644
index 0000000..02bfefd
--- /dev/null
+++ b/containers/clearview/src/clearview_app/db_migrate.py
@@ -0,0 +1,53 @@
+"""Database migration bootstrap.
+
+Replaces the previous ``Base.metadata.create_all`` + ``_ensure_schema_columns``
+startup path with Alembic. The bootstrap is idempotent and handles three cases:
+
+* **Fresh database** (no tables): run ``upgrade head`` to create the schema and
+  record the Alembic version.
+* **Existing pre-Alembic database** (tables present, no ``alembic_version``):
+  ``stamp head`` — adopt the baseline without re-creating existing tables.
+* **Already under Alembic**: run ``upgrade head`` to apply any new revisions.
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from alembic import command
+from alembic.config import Config
+from sqlalchemy import inspect
+
+from .db import engine
+
+log = logging.getLogger(__name__)
+
+_MIGRATIONS_DIR = Path(__file__).resolve().parent / "migrations"
+# A table that exists in every pre-Alembic Clearview database; its presence
+# (without alembic_version) marks a database that predates Alembic adoption.
+_SENTINEL_TABLE = "scan_jobs"
+
+
+def _alembic_config() -> Config:
+    cfg = Config()
+    cfg.set_main_option("script_location", str(_MIGRATIONS_DIR))
+    return cfg
+
+
+_BASELINE_REVISION = "0001_baseline"
+
+
+def run_migrations() -> None:
+    """Bring the database schema up to date (see module docstring)."""
+    cfg = _alembic_config()
+    tables = set(inspect(engine).get_table_names())
+
+    if "alembic_version" not in tables and _SENTINEL_TABLE in tables:
+        # Pre-Alembic DB: it already matches the baseline, so adopt that
+        # revision without re-creating tables, then let upgrade apply any
+        # later migrations (e.g. the timestamptz conversion in 0002).
+        log.info("Existing pre-Alembic schema detected; stamping baseline %s.", _BASELINE_REVISION)
+        command.stamp(cfg, _BASELINE_REVISION)
+
+    log.info("Applying Alembic migrations (upgrade head).")
+    command.upgrade(cfg, "head")
diff --git a/containers/clearview/src/clearview_app/main.py b/containers/clearview/src/clearview_app/main.py
index 7a8ff30..bb1da71 100644
--- a/containers/clearview/src/clearview_app/main.py
+++ b/containers/clearview/src/clearview_app/main.py
@@ -1,43 +1,21 @@
+"""Clearview API composition root.
+
+Routes live in the ``api_tenants``, ``api_jobs``, and ``api_onboarding`` modules
+(shared helpers in ``api_helpers``). This module only wires the FastAPI app,
+the scan worker lifecycle, health/version endpoints, and static file serving.
+"""
 from __future__ import annotations
 
-import io
-import re
-import uuid
-from datetime import datetime
 from pathlib import Path
 
-from fastapi import FastAPI, File, Form, HTTPException, UploadFile
-from fastapi.responses import FileResponse, RedirectResponse, Response, StreamingResponse
+from fastapi import FastAPI
+from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
-from sqlalchemy import select, text
-from sqlalchemy.orm import joinedload
 
-from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
-from .db import SessionLocal, engine
-from .default_sites import is_default_site, normalize_site_url
-from .models import Base, PermissionDeviation, ScanJob, ScanTarget, TenantProfile
-from .onboarding import OnboardingError, consume_callback_state, create_connect_url, create_scan_app_for_tenant
-from .cert import generate_tenant_certificate
-from .schemas import (
-    ConnectMicrosoftResponse,
-    CreateScanAppRequest,
-    CreateScanAppResponse,
-    CreateScanJobRequest,
-    CreateTenantProfileRequest,
-    PermissionDeviationItem,
-    ProbeResultResponse,
-    ResolveGroupsResponse,
-    ResolveSharingLinksRequest,
-    ResolveSharingLinksResponse,
-    SharingLinkTypesResponse,
-    ScanJobCreateResponse,
-    ScanJobDetail,
-    ScanJobSummary,
-    ScanTargetItem,
-    TenantCertificateResponse,
-    TenantProfileItem,
-)
-from .scanners import AuthConfig, probe
+from .api_jobs import router as jobs_router
+from .api_onboarding import router as onboarding_router
+from .api_tenants import router as tenants_router
+from .db_migrate import run_migrations
 from .version import display_version
 from .worker import ScanWorker
 
@@ -47,38 +25,9 @@ worker = ScanWorker()
 SITE_DIR = Path(__file__).resolve().parents[2] / "site"
 
 
-def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
-    """
-    Extract (group_name, link_type) from principal values such as:
-    - SharingLinks.<guid>.<LinkType>.<guid>
-    - c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
-    """
-    if not principal:
-        return None
-
-    text = principal.strip()
-    segments = [s.strip() for s in text.split("|") if s.strip()]
-
-    candidate = ""
-    for segment in reversed(segments):
-        if segment.lower().startswith("sharinglinks."):
-            candidate = segment
-            break
-    if not candidate and text.lower().startswith("sharinglinks."):
-        candidate = text
-    if not candidate:
-        return None
-
-    parts = candidate.split(".")
-    if len(parts) < 3:
-        return None
-    return candidate, parts[2]
-
-
 @app.on_event("startup")
 def on_startup() -> None:
-    Base.metadata.create_all(bind=engine)
-    _ensure_schema_columns()
+    run_migrations()
     worker.start()
 
 
@@ -98,747 +47,13 @@ def version() -> dict[str, str]:
     return {"version": display_version()}
 
 
-# ---------------------------------------------------------------------------
-# Tenant profiles
-# ---------------------------------------------------------------------------
-
-@app.get("/api/tenants", response_model=list[TenantProfileItem])
-def list_tenants() -> list[TenantProfileItem]:
-    with SessionLocal() as db:
-        profiles = list(
-            db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
-        )
-        return [_to_tenant_item(p) for p in profiles]
-
-
-@app.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
-def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
-    with SessionLocal() as db:
-        now = datetime.utcnow()
-        profile = TenantProfile(
-            id=str(uuid.uuid4()),
-            name=payload.name.strip(),
-            tenant_id=payload.tenant_id.strip(),
-            primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
-            client_id=payload.client_id.strip(),
-            client_secret=payload.client_secret.strip() if payload.client_secret else None,
-            created_at=now,
-            updated_at=now,
-        )
-        db.add(profile)
-        db.commit()
-        db.refresh(profile)
-        return _to_tenant_item(profile)
-
-
-@app.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
-def generate_certificate(profile_id: str) -> TenantCertificateResponse:
-    with SessionLocal() as db:
-        profile = db.get(TenantProfile, profile_id)
-        if not profile:
-            raise HTTPException(status_code=404, detail="Tenant profile not found")
-        result = generate_tenant_certificate()
-        profile.cert_private_key = result.private_key_pem
-        profile.cert_public_pem = result.public_cert_pem
-        profile.cert_thumbprint = result.thumbprint
-        profile.cert_expires_at = result.expires_at
-        profile.updated_at = datetime.utcnow()
-        db.commit()
-        return TenantCertificateResponse(
-            thumbprint=result.thumbprint,
-            expires_at=result.expires_at,
-            public_cert_pem=result.public_cert_pem,
-        )
-
-
-@app.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
-def delete_tenant(profile_id: str) -> Response:
-    with SessionLocal() as db:
-        profile = db.get(TenantProfile, profile_id)
-        if not profile:
-            raise HTTPException(status_code=404, detail="Tenant profile not found")
-        # Detach jobs from this profile before deleting
-        db.execute(
-            text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
-            {"pid": profile_id},
-        )
-        db.delete(profile)
-        db.commit()
-        return Response(status_code=204)
+app.include_router(tenants_router)
+app.include_router(jobs_router)
+app.include_router(onboarding_router)
 
 
 # ---------------------------------------------------------------------------
-# Scan jobs
-# ---------------------------------------------------------------------------
-
-@app.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
-def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
-    with SessionLocal() as db:
-        tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
-            db=db,
-            tenant_profile_id=payload.tenant_profile_id,
-            tenant_id=payload.tenant_id,
-            client_id=payload.client_id,
-            client_secret=payload.client_secret,
-        )
-    source_type = "manual"
-    if payload.scan_type == "entra_groups":
-        if payload.scan_all_groups:
-            raw_targets = _enumerate_all_entra_groups(
-                tenant_id=tenant_id,
-                client_id=client_id,
-                client_secret=client_secret,
-                profile_id=profile_id,
-            )
-            source_type = "tenant_all"
-        else:
-            raw_targets = [str(g) for g in payload.group_ids]
-    elif payload.scan_type == "mailbox":
-        if payload.scan_all_mailboxes:
-            organization = payload.organization
-            if (not organization) and profile_id:
-                with SessionLocal() as db:
-                    profile = db.get(TenantProfile, profile_id)
-                    if profile and profile.primary_domain:
-                        organization = profile.primary_domain
-            raw_targets = _enumerate_all_mailboxes(
-                organization=organization,
-                tenant_id=tenant_id,
-                client_id=client_id,
-                client_secret=client_secret,
-                profile_id=profile_id,
-            )
-            source_type = "tenant_all"
-        else:
-            raw_targets = [str(m) for m in payload.mailboxes]
-    else:
-        raw_targets = [str(item) for item in payload.site_urls]
-    return _create_job_from_targets(
-        raw_targets=raw_targets,
-        scan_type=payload.scan_type,
-        skip_default_sites=payload.skip_default_sites,
-        source_type=source_type,
-        tenant_id=tenant_id,
-        client_id=client_id,
-        client_secret=client_secret,
-        tenant_profile_id=profile_id,
-    )
-
-
-@app.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
-def create_scan_job_from_csv(
-    skip_default_sites: bool = True,
-    scan_type: str = Form("sharepoint"),
-    tenant_profile_id: str | None = Form(None),
-    tenant_id: str | None = Form(None),
-    client_id: str | None = Form(None),
-    client_secret: str | None = Form(None),
-    file: UploadFile = File(...),
-) -> ScanJobCreateResponse:
-    with SessionLocal() as db:
-        resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
-            db=db,
-            tenant_profile_id=tenant_profile_id,
-            tenant_id=tenant_id,
-            client_id=client_id,
-            client_secret=client_secret,
-        )
-    content = file.file.read()
-    if scan_type == "mailbox":
-        parsed = parse_mailboxes_csv(content)
-        targets = parsed.mailboxes
-    elif scan_type == "entra_groups":
-        parsed = parse_entra_groups_csv(content)
-        targets = parsed.urls
-    else:
-        parsed = parse_sites_csv(content)
-        targets = parsed.urls
-    response = _create_job_from_targets(
-        raw_targets=targets,
-        scan_type=scan_type,
-        skip_default_sites=skip_default_sites,
-        source_type="csv",
-        tenant_id=resolved_tenant_id,
-        client_id=resolved_client_id,
-        client_secret=resolved_client_secret,
-        tenant_profile_id=profile_id,
-    )
-
-    if parsed.invalid_rows:
-        csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
-        with SessionLocal() as db:
-            job = db.get(ScanJob, response.job.id)
-            if job:
-                if job.warning_message:
-                    job.warning_message = f"{job.warning_message} | {csv_warning}"
-                else:
-                    job.warning_message = csv_warning
-                job.updated_at = datetime.utcnow()
-                db.commit()
-                db.refresh(job)
-                response.job.warning_message = job.warning_message
-
-    return response
-
-
-@app.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
-def cancel_scan_job(job_id: str) -> ScanJobSummary:
-    with SessionLocal() as db:
-        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
-        job = db.execute(stmt).unique().scalar_one_or_none()
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-        if job.status not in ("queued", "running"):
-            raise HTTPException(status_code=409, detail="Job is not queued or running")
-        now = datetime.utcnow()
-        job.status = "cancelled"
-        job.updated_at = now
-        job.finished_at = now
-        job.scan_activity = None
-        db.commit()
-        db.refresh(job)
-        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
-        job = db.execute(stmt).unique().scalar_one()
-        return _to_job_summary(job)
-
-
-@app.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
-def delete_scan_job(job_id: str) -> Response:
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id)
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-        if job.status in ("queued", "running"):
-            raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
-        db.delete(job)
-        db.commit()
-        return Response(status_code=204)
-
-
-@app.get("/api/scan-jobs", response_model=list[ScanJobSummary])
-def list_scan_jobs(
-    limit: int = 20,
-    tenant_profile_id: str | None = None,
-    scan_type: str | None = None,
-) -> list[ScanJobSummary]:
-    with SessionLocal() as db:
-        stmt = (
-            select(ScanJob)
-            .options(joinedload(ScanJob.tenant_profile))
-            .order_by(ScanJob.created_at.desc())
-            .limit(max(1, min(limit, 100)))
-        )
-        if tenant_profile_id:
-            stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
-        if scan_type:
-            stmt = stmt.where(ScanJob.scan_type == scan_type)
-        jobs = list(db.execute(stmt).unique().scalars())
-        return [_to_job_summary(job) for job in jobs]
-
-
-@app.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
-def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id)
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-
-        principals = list(
-            db.execute(
-                select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
-            ).scalars()
-        )
-
-    type_counts: dict[str, int] = {}
-    for principal in principals:
-        parsed = _extract_sharing_link_group_and_type(str(principal or ""))
-        if not parsed:
-            continue
-        _group_name, link_type = parsed
-        type_counts[link_type] = type_counts.get(link_type, 0) + 1
-
-    return SharingLinkTypesResponse(type_counts=type_counts)
-
-
-@app.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
-def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
-    from .scanner import resolve_sharing_link_members
-
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id)
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-        if job.status in ("queued", "running"):
-            raise HTTPException(status_code=409, detail="Job is still running")
-
-        cert_private_key: str | None = None
-        cert_thumbprint: str | None = None
-        cert_public_pem: str | None = None
-        if job.tenant_profile_id:
-            profile = db.get(TenantProfile, job.tenant_profile_id)
-            if profile:
-                cert_private_key = profile.cert_private_key
-                cert_thumbprint = profile.cert_thumbprint
-                cert_public_pem = profile.cert_public_pem
-
-        auth = AuthConfig(
-            tenant_id=job.auth_tenant_id or "",
-            client_id=job.auth_client_id or "",
-            client_secret=job.auth_client_secret or "",
-            cert_private_key=cert_private_key,
-            cert_thumbprint=cert_thumbprint,
-            cert_public_pem=cert_public_pem,
-        )
-
-        all_deviations = list(
-            db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
-        )
-
-    # Group by (site_url, principal) so each unique group is resolved once
-    groups: dict[tuple[str, str], list[int]] = {}
-    for dev in all_deviations:
-        parsed = _extract_sharing_link_group_and_type(dev.principal)
-        if not parsed:
-            continue
-        group_name, link_type = parsed
-        if link_type not in payload.link_types:
-            continue
-        key = (dev.site_url, group_name)
-        groups.setdefault(key, []).append(dev.id)
-
-    updated_deviations = 0
-    for (site_url, group_name), dev_ids in groups.items():
-        members = resolve_sharing_link_members(site_url, group_name, auth)
-        resolved_members = ", ".join(members) if members else ""
-        with SessionLocal() as db:
-            for dev_id in dev_ids:
-                dev = db.get(PermissionDeviation, dev_id)
-                if dev:
-                    dev.resolved_members = resolved_members
-            db.commit()
-        updated_deviations += len(dev_ids)
-
-    return ResolveSharingLinksResponse(
-        resolved_groups=len(groups),
-        updated_deviations=updated_deviations,
-    )
-
-
-@app.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
-def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
-    """
-    Expand group principals on this job's deviations and write each group's
-    member list to permission_deviations.resolved_members. Handles both
-    classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
-    assigned directly at root (via Microsoft Graph). Skips email-shape users
-    and SharingLinks groups (those have their own resolver).
-    """
-    from .scanners.sharepoint import (
-        is_aad_group_principal,
-        is_sharepoint_group_principal,
-        resolve_aad_group_members,
-        resolve_sharing_link_members,
-    )
-
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id)
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-        if job.status in ("queued", "running"):
-            raise HTTPException(status_code=409, detail="Job is still running")
-        if (job.scan_type or "sharepoint") == "mailbox":
-            raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
-
-        cert_private_key: str | None = None
-        cert_thumbprint: str | None = None
-        cert_public_pem: str | None = None
-        if job.tenant_profile_id:
-            profile = db.get(TenantProfile, job.tenant_profile_id)
-            if profile:
-                cert_private_key = profile.cert_private_key
-                cert_thumbprint = profile.cert_thumbprint
-                cert_public_pem = profile.cert_public_pem
-
-        auth = AuthConfig(
-            tenant_id=job.auth_tenant_id or "",
-            client_id=job.auth_client_id or "",
-            client_secret=job.auth_client_secret or "",
-            cert_private_key=cert_private_key,
-            cert_thumbprint=cert_thumbprint,
-            cert_public_pem=cert_public_pem,
-        )
-
-        all_deviations = list(
-            db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
-        )
-
-    # Group deviations by (site_url, principal) so each unique group is resolved once
-    groups: dict[tuple[str, str], list[int]] = {}
-    for dev in all_deviations:
-        if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
-            continue
-        key = (dev.site_url, dev.principal)
-        groups.setdefault(key, []).append(dev.id)
-
-    resolved = 0
-    skipped = 0
-    updated = 0
-    for (site_url, group_name), dev_ids in groups.items():
-        try:
-            if is_aad_group_principal(group_name):
-                members = resolve_aad_group_members(group_name, auth)
-            else:
-                members = resolve_sharing_link_members(site_url, group_name, auth)
-        except Exception:  # noqa: BLE001
-            members = []
-
-        if not members:
-            skipped += 1
-            continue
-
-        resolved_text = ", ".join(members)
-        with SessionLocal() as db:
-            for dev_id in dev_ids:
-                dev = db.get(PermissionDeviation, dev_id)
-                if dev:
-                    dev.resolved_members = resolved_text
-            db.commit()
-        resolved += 1
-        updated += len(dev_ids)
-
-    return ResolveGroupsResponse(
-        resolved_groups=resolved,
-        skipped_groups=skipped,
-        updated_deviations=updated,
-    )
-
-
-@app.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
-def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id)
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-        target = db.get(ScanTarget, target_id)
-        if not target or target.job_id != job_id:
-            raise HTTPException(status_code=404, detail="Target not found")
-        if job.status in ("queued", "running"):
-            raise HTTPException(status_code=409, detail="Job is still running")
-
-        cert_private_key: str | None = None
-        cert_thumbprint: str | None = None
-        cert_public_pem: str | None = None
-        if job.tenant_profile_id:
-            profile = db.get(TenantProfile, job.tenant_profile_id)
-            if profile:
-                cert_private_key = profile.cert_private_key
-                cert_thumbprint = profile.cert_thumbprint
-                cert_public_pem = profile.cert_public_pem
-
-        auth = AuthConfig(
-            tenant_id=job.auth_tenant_id or "",
-            client_id=job.auth_client_id or "",
-            client_secret=job.auth_client_secret or "",
-            cert_private_key=cert_private_key,
-            cert_thumbprint=cert_thumbprint,
-            cert_public_pem=cert_public_pem,
-        )
-        site_url = target.site_url
-        job_scan_type = job.scan_type or "sharepoint"
-
-    result = probe(job_scan_type, site_url, auth)
-
-    with SessionLocal() as db:
-        target = db.get(ScanTarget, target_id)
-        if not target:
-            raise HTTPException(status_code=404, detail="Target not found")
-        now = datetime.utcnow()
-        target.last_probe_at = now
-        target.last_probe_ok = result.ok
-        target.last_probe_message = result.message
-        target.updated_at = now
-        db.commit()
-        db.refresh(target)
-        return ProbeResultResponse(
-            target_id=target.id,
-            ok=result.ok,
-            message=result.message,
-            last_probe_at=target.last_probe_at,
-        )
-
-
-@app.get("/api/scan-jobs/{job_id}/export")
-def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
-    import openpyxl
-    from openpyxl.styles import Font, PatternFill
-
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-
-        targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
-        if site_url:
-            targets_q = targets_q.where(ScanTarget.site_url == site_url)
-        targets = list(db.execute(targets_q).scalars())
-
-        deviations_q = (
-            select(PermissionDeviation)
-            .where(PermissionDeviation.job_id == job.id)
-            .order_by(PermissionDeviation.id.desc())
-        )
-        if site_url:
-            deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
-        deviations = list(db.execute(deviations_q).scalars())
-
-    wb = openpyxl.Workbook()
-    header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
-    header_font_white = Font(bold=True, color="FFFFFF")
-
-    _risk_styles: dict[str, tuple] = {
-        "Critical": (
-            PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
-            Font(bold=True, color="7B0000"),
-        ),
-        "High": (
-            PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
-            Font(bold=True, color="7C2D00"),
-        ),
-        "Low": (
-            PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
-            Font(bold=True, color="0C4A6E"),
-        ),
-        "Unknown": (
-            PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
-            Font(bold=True, color="555555"),
-        ),
-    }
-
-    def _style_header(ws, headers):
-        ws.append(headers)
-        for cell in ws[1]:
-            cell.font = header_font_white
-            cell.fill = header_fill
-
-    scan_type = job.scan_type or "sharepoint"
-
-    target_label = {
-        "sharepoint": "Site URL",
-        "sharepoint_root": "Site URL",
-        "mailbox": "Mailbox",
-        "entra_groups": "Group",
-    }.get(scan_type, "Target")
-
-    # Targets sheet
-    ws_targets = wb.active
-    ws_targets.title = "Targets"
-    _style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
-    for t in targets:
-        ws_targets.append([
-            t.site_url,
-            t.status,
-            t.attempts,
-            t.error_message or "",
-            t.started_at.isoformat() if t.started_at else "",
-            t.finished_at.isoformat() if t.finished_at else "",
-        ])
-    for col in ws_targets.columns:
-        ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
-
-    # Results sheet — name and columns depend on scan type
-    if scan_type == "mailbox":
-        ws_dev = wb.create_sheet("Mailbox Permissions")
-        _style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
-        deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
-        for d in deviations:
-            ws_dev.append([
-                d.site_url,
-                d.object_url,
-                d.permission_type or d.object_type,
-                d.principal,
-                d.role_name,
-            ])
-    elif scan_type == "entra_groups":
-        ws_dev = wb.create_sheet("Group Memberships")
-        _style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
-        deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
-        for d in deviations:
-            ws_dev.append([
-                d.object_url,
-                d.permission_type or "",
-                d.principal,
-                d.role_name,
-            ])
-    elif scan_type == "sharepoint_root":
-        ws_dev = wb.create_sheet("Root Permissions")
-        _style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
-        deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
-        for d in deviations:
-            ws_dev.append([
-                d.site_url,
-                d.principal,
-                d.resolved_members or "",
-                d.role_name,
-            ])
-    else:
-        ws_dev = wb.create_sheet("Deviations")
-        _style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
-        deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
-        for d in deviations:
-            base = (d.site_url or "").rstrip("/")
-            obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
-            link_risk = _sharing_link_risk_label(d.principal)
-            ws_dev.append([
-                d.site_url,
-                obj_rel,
-                d.object_type,
-                d.principal,
-                link_risk,
-                d.resolved_members or "",
-                d.role_name,
-                d.delta_type,
-            ])
-            if link_risk in _risk_styles:
-                risk_fill, risk_font = _risk_styles[link_risk]
-                risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
-                risk_cell.fill = risk_fill
-                risk_cell.font = risk_font
-    for col in ws_dev.columns:
-        ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
-
-    buf = io.BytesIO()
-    wb.save(buf)
-    buf.seek(0)
-
-    filename = _build_export_filename(job, job_id)
-    return StreamingResponse(
-        buf,
-        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
-    )
-
-
-@app.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
-def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
-    with SessionLocal() as db:
-        job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
-        if not job:
-            raise HTTPException(status_code=404, detail="Job not found")
-
-        targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
-        if site_url:
-            targets_q = targets_q.where(ScanTarget.site_url == site_url)
-        targets = list(db.execute(targets_q).scalars())
-
-        deviations_q = (
-            select(PermissionDeviation)
-            .where(PermissionDeviation.job_id == job.id)
-            .order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
-        )
-        if site_url:
-            deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
-        else:
-            deviations_q = deviations_q.limit(1000)
-        deviations = list(db.execute(deviations_q).scalars())
-
-        return ScanJobDetail(
-            **_to_job_summary(job).model_dump(),
-            targets=[
-                ScanTargetItem(
-                    id=t.id,
-                    site_url=t.site_url,
-                    status=t.status,
-                    attempts=t.attempts,
-                    error_message=t.error_message,
-                    started_at=t.started_at,
-                    finished_at=t.finished_at,
-                    last_probe_at=t.last_probe_at,
-                    last_probe_ok=t.last_probe_ok,
-                    last_probe_message=t.last_probe_message,
-                )
-                for t in targets
-            ],
-            deviations=[
-                PermissionDeviationItem(
-                    id=d.id,
-                    site_url=d.site_url,
-                    object_url=d.object_url,
-                    object_type=d.object_type,
-                    principal=d.principal,
-                    role_name=d.role_name,
-                    delta_type=d.delta_type,
-                    permission_type=d.permission_type,
-                    resolved_members=d.resolved_members,
-                    created_at=d.created_at,
-                )
-                for d in deviations
-            ],
-        )
-
-
-# ---------------------------------------------------------------------------
-# Onboarding
-# ---------------------------------------------------------------------------
-
-@app.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
-def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
-    try:
-        result = create_scan_app_for_tenant(
-            tenant_id=payload.tenant_id,
-            display_name=payload.display_name,
-        )
-    except OnboardingError as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-    except Exception as exc:  # noqa: BLE001
-        raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
-
-    return CreateScanAppResponse(
-        tenant_id=result.tenant_id,
-        client_id=result.client_id,
-        client_secret=result.client_secret,
-        app_object_id=result.app_object_id,
-        service_principal_id=result.service_principal_id,
-        display_name=result.display_name,
-    )
-
-
-@app.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
-def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
-    try:
-        return ConnectMicrosoftResponse(connect_url=create_connect_url())
-    except OnboardingError as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@app.get("/api/onboarding/microsoft/callback")
-def onboarding_microsoft_callback(
-    tenant: str | None = None,
-    state: str | None = None,
-    error: str | None = None,
-    error_description: str | None = None,
-) -> RedirectResponse:
-    if error:
-        message = (error_description or error).replace(" ", "+")
-        return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
-
-    if not state or not consume_callback_state(state):
-        return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
-
-    if not tenant:
-        return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
-
-    return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
-
-
-@app.get("/api/onboarding/status")
-def onboarding_status() -> dict[str, bool]:
-    from . import config
-    automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
-    return {"automated_available": automated}
-
-
-# ---------------------------------------------------------------------------
-# Static files
+# Static files (mounted last so explicit API routes take precedence)
 # ---------------------------------------------------------------------------
 
 @app.get("/")
@@ -847,306 +62,3 @@ def index() -> FileResponse:
 
 
 app.mount("/", StaticFiles(directory=SITE_DIR, html=True), name="site")
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-_SCAN_TYPE_LABELS = {
-    "sharepoint": "Deviations",
-    "sharepoint_root": "Root",
-    "mailbox": "Mailbox",
-    "entra_groups": "EntraGroups",
-}
-
-
-def _build_export_filename(job: ScanJob, job_id: str) -> str:
-    tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
-    safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
-    scan_type = job.scan_type or "sharepoint"
-    type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
-    short_id = job_id.replace("-", "")[-12:]
-    return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
-
-
-def _enumerate_all_entra_groups(
-    tenant_id: str,
-    client_id: str,
-    client_secret: str | None,
-    profile_id: str | None,
-) -> list[str]:
-    cert_private_key: str | None = None
-    cert_thumbprint: str | None = None
-    cert_public_pem: str | None = None
-    if profile_id:
-        with SessionLocal() as db:
-            profile = db.get(TenantProfile, profile_id)
-            if profile:
-                cert_private_key = profile.cert_private_key
-                cert_thumbprint = profile.cert_thumbprint
-                cert_public_pem = profile.cert_public_pem
-
-    auth = AuthConfig(
-        tenant_id=tenant_id,
-        client_id=client_id,
-        client_secret=client_secret or "",
-        cert_private_key=cert_private_key,
-        cert_thumbprint=cert_thumbprint,
-        cert_public_pem=cert_public_pem,
-    )
-
-    from .scanners import entra as _entra
-
-    try:
-        return _entra.list_all_groups(auth)
-    except Exception as exc:  # noqa: BLE001
-        raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
-
-
-def _enumerate_all_mailboxes(
-    organization: str | None,
-    tenant_id: str,
-    client_id: str,
-    client_secret: str | None,
-    profile_id: str | None,
-) -> list[str]:
-    if not organization or "." not in organization:
-        raise HTTPException(
-            status_code=400,
-            detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
-        )
-
-    cert_private_key: str | None = None
-    cert_thumbprint: str | None = None
-    cert_public_pem: str | None = None
-    if profile_id:
-        with SessionLocal() as db:
-            profile = db.get(TenantProfile, profile_id)
-            if profile:
-                cert_private_key = profile.cert_private_key
-                cert_thumbprint = profile.cert_thumbprint
-                cert_public_pem = profile.cert_public_pem
-
-    auth = AuthConfig(
-        tenant_id=tenant_id,
-        client_id=client_id,
-        client_secret=client_secret or "",
-        cert_private_key=cert_private_key,
-        cert_thumbprint=cert_thumbprint,
-        cert_public_pem=cert_public_pem,
-    )
-
-    from .scanners import mailbox as _mailbox
-
-    try:
-        return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
-    except Exception as exc:  # noqa: BLE001
-        raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
-
-
-def _resolve_credentials(
-    db,
-    tenant_profile_id: str | None,
-    tenant_id: str | None,
-    client_id: str | None,
-    client_secret: str | None,
-) -> tuple[str, str, str | None, str | None]:
-    if tenant_profile_id:
-        profile = db.get(TenantProfile, tenant_profile_id)
-        if not profile:
-            raise HTTPException(status_code=404, detail="Tenant profile not found")
-        if not profile.client_secret and not profile.cert_thumbprint:
-            raise HTTPException(
-                status_code=400,
-                detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
-            )
-        return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
-    if tenant_id and client_id and client_secret:
-        return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
-    raise HTTPException(
-        status_code=400,
-        detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
-    )
-
-
-def _create_job_from_targets(
-    raw_targets: list[str],
-    scan_type: str,
-    skip_default_sites: bool,
-    source_type: str,
-    tenant_id: str,
-    client_id: str,
-    client_secret: str,
-    tenant_profile_id: str | None = None,
-) -> ScanJobCreateResponse:
-    accepted: list[str] = []
-    skipped_default_urls: list[str] = []
-    invalid: list[str] = []
-
-    seen: set[str] = set()
-
-    for raw in raw_targets:
-        if scan_type == "mailbox":
-            normalized = (raw or "").strip().lower()
-            if not normalized or "@" not in normalized:
-                invalid.append(raw)
-                continue
-        elif scan_type == "entra_groups":
-            normalized = (raw or "").strip()
-            if not normalized:
-                invalid.append(raw)
-                continue
-        else:
-            normalized = normalize_site_url(raw) or ""
-            if not normalized:
-                invalid.append(raw)
-                continue
-
-        if normalized in seen:
-            continue
-        seen.add(normalized)
-
-        if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
-            skipped_default_urls.append(normalized)
-            continue
-
-        accepted.append(normalized)
-
-    with SessionLocal() as db:
-        now = datetime.utcnow()
-        job = ScanJob(
-            id=str(uuid.uuid4()),
-            source_type=source_type,
-            scan_type=scan_type,
-            status="queued" if accepted else "completed",
-            skip_default_sites=skip_default_sites,
-            tenant_profile_id=tenant_profile_id,
-            auth_tenant_id=tenant_id,
-            auth_client_id=client_id,
-            auth_client_secret=client_secret,
-            total_targets=len(accepted),
-            skipped_targets=len(skipped_default_urls),
-            warning_message=None,
-            error_message=None,
-            created_at=now,
-            updated_at=now,
-            finished_at=now if not accepted else None,
-        )
-
-        if not accepted:
-            if scan_type == "mailbox":
-                job.warning_message = "No scannable mailboxes after validation"
-            else:
-                job.warning_message = "No scannable sites after validation and default-site filtering"
-
-        db.add(job)
-        db.flush()
-
-        for index, target in enumerate(accepted, start=1):
-            db.add(
-                ScanTarget(
-                    job_id=job.id,
-                    site_url=target,
-                    source_row=index,
-                    status="queued",
-                    attempts=0,
-                    created_at=now,
-                    updated_at=now,
-                )
-            )
-
-        db.commit()
-
-        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
-        job = db.execute(stmt).unique().scalar_one()
-
-        return ScanJobCreateResponse(
-            job=_to_job_summary(job),
-            accepted_urls=accepted,
-            skipped_default_urls=skipped_default_urls,
-            invalid_urls=invalid,
-        )
-
-
-def _to_job_summary(job: ScanJob) -> ScanJobSummary:
-    return ScanJobSummary(
-        id=job.id,
-        status=job.status,
-        source_type=job.source_type,
-        scan_type=job.scan_type or "sharepoint",
-        skip_default_sites=job.skip_default_sites,
-        tenant_profile_id=job.tenant_profile_id,
-        tenant_name=job.tenant_profile.name if job.tenant_profile else None,
-        total_targets=job.total_targets,
-        processed_targets=job.processed_targets,
-        successful_targets=job.successful_targets,
-        failed_targets=job.failed_targets,
-        skipped_targets=job.skipped_targets,
-        items_scanned=job.items_scanned,
-        scan_activity=job.scan_activity if job.status == "running" else None,
-        warning_message=job.warning_message,
-        error_message=job.error_message,
-        created_at=job.created_at,
-        updated_at=job.updated_at,
-        started_at=job.started_at,
-        finished_at=job.finished_at,
-    )
-
-
-def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
-    return TenantProfileItem(
-        id=profile.id,
-        name=profile.name,
-        tenant_id=profile.tenant_id,
-        primary_domain=profile.primary_domain,
-        client_id=profile.client_id,
-        has_certificate=bool(profile.cert_thumbprint),
-        cert_thumbprint=profile.cert_thumbprint,
-        cert_expires_at=profile.cert_expires_at,
-        created_at=profile.created_at,
-        updated_at=profile.updated_at,
-    )
-
-
-def _sharing_link_risk_label(principal: str) -> str:
-    if not principal.startswith("SharingLinks."):
-        return ""
-    parts = principal.split(".", 3)
-    link_type = parts[2] if len(parts) >= 3 else ""
-    if link_type.startswith("Anonymous"):
-        return "Critical"
-    if link_type == "Flexible":
-        return "High"
-    if link_type.startswith("Organization"):
-        return "Low"
-    if link_type.startswith("Direct"):
-        return "Low"
-    return "Unknown"
-
-
-def _ensure_schema_columns() -> None:
-    stmts = [
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_tenant_id VARCHAR(128)",
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_client_id VARCHAR(128)",
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_client_secret TEXT",
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS tenant_profile_id VARCHAR(36)",
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS items_scanned INTEGER NOT NULL DEFAULT 0",
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_activity TEXT",
-        "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_type VARCHAR(32) NOT NULL DEFAULT 'sharepoint'",
-        "ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS permission_type VARCHAR(32)",
-        "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS primary_domain VARCHAR(256)",
-        "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS client_secret TEXT",
-        "ALTER TABLE tenant_profiles ALTER COLUMN client_secret DROP NOT NULL",
-        "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_private_key TEXT",
-        "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_public_pem TEXT",
-        "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_thumbprint VARCHAR(64)",
-        "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_expires_at TIMESTAMP",
-        "ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS resolved_members TEXT",
-        "ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_at TIMESTAMP",
-        "ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_ok BOOLEAN",
-        "ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_message TEXT",
-    ]
-    with engine.begin() as conn:
-        for stmt in stmts:
-            conn.execute(text(stmt))
diff --git a/containers/clearview/src/clearview_app/migrations/env.py b/containers/clearview/src/clearview_app/migrations/env.py
new file mode 100644
index 0000000..c07a2c4
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/env.py
@@ -0,0 +1,58 @@
+"""Alembic environment for Clearview.
+
+Reuses the application's SQLAlchemy engine (already configured with the
+normalized DATABASE_URL and pool_pre_ping) so migrations run against exactly
+the same database the app uses. Logging config from alembic.ini is applied
+only when Alembic is invoked through the CLI; programmatic invocation from
+``clearview_app.db_migrate`` passes a Config without a file.
+"""
+from __future__ import annotations
+
+from logging.config import fileConfig
+
+from alembic import context
+
+from clearview_app.config import DATABASE_URL
+from clearview_app.db import _normalize_database_url, engine as app_engine
+from clearview_app.models import Base
+
+config = context.config
+
+if config.config_file_name is not None:
+    try:
+        fileConfig(config.config_file_name)
+    except Exception:  # noqa: BLE001 - logging config is best-effort
+        pass
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    """Emit SQL to stdout without a live DB connection."""
+    context.configure(
+        url=_normalize_database_url(DATABASE_URL),
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+        compare_type=True,
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations against the live database via the app engine."""
+    with app_engine.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            compare_type=True,
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/containers/clearview/src/clearview_app/migrations/script.py.mako b/containers/clearview/src/clearview_app/migrations/script.py.mako
new file mode 100644
index 0000000..da29ede
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py b/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
new file mode 100644
index 0000000..fe959d5
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
@@ -0,0 +1,31 @@
+"""baseline schema
+
+Captures the full Clearview schema as defined by the SQLAlchemy models at the
+time Alembic was adopted. Creating it via ``Base.metadata.create_all`` keeps the
+baseline guaranteed-identical to the models (the same DDL the app emitted before
+Alembic). Existing databases are ``stamp``-ed to this revision rather than
+re-running ``upgrade`` (see ``clearview_app.db_migrate``).
+
+Revision ID: 0001_baseline
+Revises:
+Create Date: 2026-05-26
+"""
+from __future__ import annotations
+
+from alembic import op
+
+from clearview_app.models import Base
+
+# revision identifiers, used by Alembic.
+revision = "0001_baseline"
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    Base.metadata.create_all(bind=op.get_bind())
+
+
+def downgrade() -> None:
+    Base.metadata.drop_all(bind=op.get_bind())
diff --git a/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py b/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py
new file mode 100644
index 0000000..c32a973
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py
@@ -0,0 +1,63 @@
+"""convert timestamp columns to timestamptz
+
+The app now uses timezone-aware UTC datetimes (DateTime(timezone=True)).
+Existing databases store naive ``timestamp without time zone`` values that were
+written as UTC, so we reinterpret them as UTC while converting. The conversion
+is guarded per column on the current type, so it is a no-op on databases whose
+columns are already ``timestamptz`` (e.g. a fresh DB created from the updated
+baseline models).
+
+Revision ID: 0002_timestamptz
+Revises: 0001_baseline
+Create Date: 2026-05-26
+"""
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "0002_timestamptz"
+down_revision = "0001_baseline"
+branch_labels = None
+depends_on = None
+
+# Table -> datetime columns (names come from our own models, never user input).
+_COLUMNS: dict[str, tuple[str, ...]] = {
+    "tenant_profiles": ("cert_expires_at", "created_at", "updated_at"),
+    "scan_jobs": ("created_at", "updated_at", "started_at", "finished_at", "heartbeat_at"),
+    "scan_targets": ("last_probe_at", "created_at", "updated_at", "started_at", "finished_at"),
+    "permission_deviations": ("created_at",),
+}
+
+
+def _column_type(bind, table: str, column: str) -> str | None:
+    return bind.execute(
+        sa.text(
+            "SELECT data_type FROM information_schema.columns "
+            "WHERE table_name = :t AND column_name = :c"
+        ),
+        {"t": table, "c": column},
+    ).scalar()
+
+
+def upgrade() -> None:
+    bind = op.get_bind()
+    for table, columns in _COLUMNS.items():
+        for column in columns:
+            if _column_type(bind, table, column) == "timestamp without time zone":
+                op.execute(
+                    f'ALTER TABLE {table} ALTER COLUMN {column} '
+                    f"TYPE timestamptz USING {column} AT TIME ZONE 'UTC'"
+                )
+
+
+def downgrade() -> None:
+    bind = op.get_bind()
+    for table, columns in _COLUMNS.items():
+        for column in columns:
+            if _column_type(bind, table, column) == "timestamp with time zone":
+                op.execute(
+                    f'ALTER TABLE {table} ALTER COLUMN {column} '
+                    f"TYPE timestamp USING {column} AT TIME ZONE 'UTC'"
+                )
diff --git a/containers/clearview/src/clearview_app/models.py b/containers/clearview/src/clearview_app/models.py
index ceccf26..e67483e 100644
--- a/containers/clearview/src/clearview_app/models.py
+++ b/containers/clearview/src/clearview_app/models.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
 
-from datetime import datetime
+from datetime import datetime, timezone
 
 from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
 
 
+def _utcnow() -> datetime:
+    """Timezone-aware UTC now, used as the default for timestamp columns."""
+    return datetime.now(timezone.utc)
+
+
 class Base(DeclarativeBase):
     pass
 
@@ -22,9 +27,9 @@ class TenantProfile(Base):
     cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
     cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
     cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
-    cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
 
     jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile")
 
@@ -56,11 +61,11 @@ class ScanJob(Base):
     warning_message: Mapped[str | None] = mapped_column(Text, nullable=True)
     error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
 
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
 
     tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs")
     targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan")
@@ -79,14 +84,14 @@ class ScanTarget(Base):
     attempts: Mapped[int] = mapped_column(Integer, default=0)
     error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
 
-    last_probe_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
     last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
     last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)
 
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
 
     job: Mapped[ScanJob] = relationship(back_populates="targets")
     deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan")
@@ -108,7 +113,7 @@ class PermissionDeviation(Base):
     permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
     resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)
 
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
 
     job: Mapped[ScanJob] = relationship(back_populates="deviations")
     target: Mapped[ScanTarget] = relationship(back_populates="deviations")
diff --git a/containers/clearview/src/clearview_app/scanners/sharepoint.py b/containers/clearview/src/clearview_app/scanners/sharepoint.py
index 097aefa..db4db24 100644
--- a/containers/clearview/src/clearview_app/scanners/sharepoint.py
+++ b/containers/clearview/src/clearview_app/scanners/sharepoint.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import re
+import threading
 import time
 from dataclasses import dataclass
 from urllib.parse import urlparse
@@ -32,7 +33,13 @@ class PermissionEntry:
     role_name: str
 
 
-_TOKEN_CACHE: dict[str, str] = {}
+# Cache maps cache_key -> (access_token, expires_at_epoch). Guarded by
+# _TOKEN_LOCK because the worker acquires tokens from multiple threads.
+_TOKEN_CACHE: dict[str, tuple[str, float]] = {}
+_TOKEN_LOCK = threading.Lock()
+# Reuse one MSAL app per (tenant, client, auth_method) so MSAL's own token
+# cache works and refreshes app tokens automatically.
+_MSAL_APPS: dict[str, "msal.ConfidentialClientApplication"] = {}
 
 
 def scan_site_for_deviations(
@@ -612,18 +619,20 @@ def _probe_hint(error: str, stage: str) -> str:
     return error[:220]
 
 
-def _get_token_for_host(host: str, auth: AuthConfig) -> str:
-    auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
-    cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
-    cached = _TOKEN_CACHE.get(cache_key)
-    if cached:
-        return cached
+def _get_msal_app(auth: AuthConfig, auth_method: str) -> "msal.ConfidentialClientApplication":
+    """Return a cached ConfidentialClientApplication for these credentials.
+
+    Reusing the app object lets MSAL's built-in token cache serve and refresh
+    app-only tokens instead of re-authenticating on every call.
+    """
+    app_key = f"{auth.tenant_id}|{auth.client_id}|{auth_method}"
+    app = _MSAL_APPS.get(app_key)
+    if app is not None:
+        return app
 
-    scope = f"https://{host}/.default"
     authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
-
     if auth_method == "cert":
-        client_credential = {
+        client_credential: dict[str, str | None] | str | None = {
             "thumbprint": auth.cert_thumbprint,
             "private_key": auth.cert_private_key,
         }
@@ -635,16 +644,34 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
         authority=authority,
         client_credential=client_credential,
     )
-    result = app.acquire_token_for_client(scopes=[scope])
+    _MSAL_APPS[app_key] = app
+    return app
 
-    if "access_token" not in result:
-        error = result.get("error", "unknown")
-        description = result.get("error_description", "")
-        raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
 
-    token = str(result["access_token"])
-    _TOKEN_CACHE[cache_key] = token
-    return token
+def _get_token_for_host(host: str, auth: AuthConfig) -> str:
+    auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
+    cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
+
+    with _TOKEN_LOCK:
+        cached = _TOKEN_CACHE.get(cache_key)
+        if cached is not None and time.time() < cached[1]:
+            return cached[0]
+
+        scope = f"https://{host}/.default"
+        app = _get_msal_app(auth, auth_method)
+        result = app.acquire_token_for_client(scopes=[scope])
+
+        if "access_token" not in result:
+            error = result.get("error", "unknown")
+            description = result.get("error_description", "")
+            raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
+
+        token = str(result["access_token"])
+        # expires_in is seconds-from-now; refresh 60s early to avoid edge expiry.
+        expires_in = int(result.get("expires_in", 3600))
+        expires_at = time.time() + max(expires_in - 60, 0)
+        _TOKEN_CACHE[cache_key] = (token, expires_at)
+        return token
 
 
 def _iter_paged(url: str, headers: dict[str, str]):
diff --git a/containers/clearview/src/clearview_app/schemas.py b/containers/clearview/src/clearview_app/schemas.py
index 19cfe25..910ba14 100644
--- a/containers/clearview/src/clearview_app/schemas.py
+++ b/containers/clearview/src/clearview_app/schemas.py
@@ -1,9 +1,14 @@
 from __future__ import annotations
 
 from datetime import datetime
+from typing import Literal
 
 from pydantic import BaseModel, Field, HttpUrl
 
+# Valid scan types, mirrored by the frontend scan-type dropdowns. Used to
+# validate incoming job requests (FastAPI returns 422 on anything else).
+ScanType = Literal["sharepoint", "sharepoint_root", "mailbox", "entra_groups"]
+
 
 class CreateTenantProfileRequest(BaseModel):
     name: str
@@ -33,7 +38,7 @@ class TenantCertificateResponse(BaseModel):
 
 
 class CreateScanJobRequest(BaseModel):
-    scan_type: str = "sharepoint"
+    scan_type: ScanType = "sharepoint"
     site_urls: list[HttpUrl] = Field(default_factory=list)
     mailboxes: list[str] = Field(default_factory=list)
     scan_all_mailboxes: bool = False
diff --git a/containers/clearview/src/clearview_app/version.py b/containers/clearview/src/clearview_app/version.py
index b31ae7a..c0cd924 100644
--- a/containers/clearview/src/clearview_app/version.py
+++ b/containers/clearview/src/clearview_app/version.py
@@ -7,7 +7,7 @@ history, so operators can see exactly which image build is running.
 from __future__ import annotations
 
 VERSION = "v0.1.0"
-BUILD = 1
+BUILD = 2
 
 
 def display_version() -> str:
diff --git a/containers/clearview/src/clearview_app/worker.py b/containers/clearview/src/clearview_app/worker.py
index a78ae63..3d3f6ca 100644
--- a/containers/clearview/src/clearview_app/worker.py
+++ b/containers/clearview/src/clearview_app/worker.py
@@ -4,7 +4,7 @@ import logging
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
-from datetime import datetime
+from datetime import datetime, timezone
 
 from sqlalchemy import select
 
@@ -47,17 +47,21 @@ class ScanWorker:
 
     def _process_next_job(self) -> bool:
         with SessionLocal() as db:
+            # Atomic claim: lock the chosen queued row and skip rows already
+            # locked by another worker, so multiple workers/replicas never grab
+            # the same job. The status flip is committed in this transaction.
             job = db.execute(
                 select(ScanJob)
                 .where(ScanJob.status == "queued")
                 .order_by(ScanJob.created_at.asc())
                 .limit(1)
+                .with_for_update(skip_locked=True)
             ).scalar_one_or_none()
 
             if job is None:
                 return False
 
-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
             job.status = "running"
             job.started_at = now
             job.heartbeat_at = now
@@ -96,7 +100,7 @@ class ScanWorker:
             job = db.get(ScanJob, job_id)
             if not job:
                 return
-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
             job.heartbeat_at = now
             job.updated_at = now
             job.finished_at = now
@@ -113,7 +117,7 @@ class ScanWorker:
             if not job or not target:
                 return
 
-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
             target.status = "running"
             target.started_at = now
             target.updated_at = now
@@ -128,7 +132,7 @@ class ScanWorker:
                 target = db.get(ScanTarget, target_id)
                 if not job or not target:
                     return
-                now = datetime.utcnow()
+                now = datetime.now(timezone.utc)
                 target.status = "failed"
                 target.attempts = 1
                 target.error_message = f"Preflight: {probe.message}"
@@ -173,7 +177,7 @@ class ScanWorker:
                             )
                         )
 
-                    now = datetime.utcnow()
+                    now = datetime.now(timezone.utc)
                     target.status = "completed"
                     target.attempts = attempt
                     target.error_message = None
@@ -203,7 +207,7 @@ class ScanWorker:
             if not job or not target:
                 return
 
-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
             target.status = "failed"
             target.attempts = max_attempts
             target.error_message = last_error
@@ -252,7 +256,7 @@ class ScanWorker:
         with SessionLocal() as db:
             target = db.get(ScanTarget, target_id)
             if target:
-                now = datetime.utcnow()
+                now = datetime.now(timezone.utc)
                 target.last_probe_at = now
                 target.last_probe_ok = result.ok
                 target.last_probe_message = result.message
@@ -298,8 +302,8 @@ class ScanWorker:
                         job.scan_activity = activity
                         if items > 0:
                             job.items_scanned += items
-                        job.heartbeat_at = datetime.utcnow()
-                        job.updated_at = datetime.utcnow()
+                        job.heartbeat_at = datetime.now(timezone.utc)
+                        job.updated_at = datetime.now(timezone.utc)
                         db.commit()
             except Exception:  # noqa: BLE001
                 pass
diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md
index 1097da5..8d76c14 100644
--- a/docs/changelog-develop.md
+++ b/docs/changelog-develop.md
@@ -2,6 +2,42 @@
 
 This file documents changes on the develop branch of this project.
 
+## 2026-05-26 — UI/UX: dead CSS removal, a11y, distinct risk colours, richer dashboard
+
+### Added
+- **Dashboard enrichment** — a fourth KPI card **With errors** (`#statErrors`, counts jobs that are `completed_with_errors` or have `failed_targets > 0`) and a **Recent jobs** panel (`#dashRecentJobs`, last 5 jobs, each row clickable to jump to its details). Populated from the existing `/api/scan-jobs` list in `refreshJobs()` via a new `renderDashRecent()`; all interpolated fields run through `escHtml()`.
+
+### Changed
+- **Removed dead CSS** — the pre-sidebar `.topbar`, `.topbar-actions`, and `.layout` rules (and their now-orphaned references inside the 930px/640px media queries) were deleted; the layout has used `.app-shell`/`.sidebar`/`.content` since the sidebar refactor.
+- **Accessibility** — focus outline strengthened from `rgba(14,165,233,0.38)` to a solid `var(--cv-accent)` (meets WCAG non-text 3:1) and now also covers `a:focus-visible`. On route changes (`applyRoute`), focus now moves to the new page's first heading (`h1/h2`, `tabindex=-1`) and `document.title` updates, so screen-reader/keyboard users land in the freshly shown content.
+- **Distinct risk colours** — the `risk.warn` badge changed from accent-blue (indistinguishable from `info`/`low`) to amber (`#854d0e` on `rgba(234,179,8,.18)`), giving a real low→high colour gradient.
+- **Consistent XSS escaping** — `job.id` and `job.source_type` in the Scan Jobs table are now passed through `escHtml()` (previously interpolated raw), matching the rest of the table.
+
+## 2026-05-26 — Split monolithic main.py into route modules
+
+### Changed
+- **`main.py` reduced from 1152 to 64 lines** — now a composition root that only wires the FastAPI app, scan-worker lifecycle, `/healthz`, `/api/version`, the `/` index + static mount, and `include_router` for the new route modules. All endpoint logic moved out verbatim (behaviour-preserving).
+- **New route modules** (flat modules at package level so existing single-dot relative imports stay unchanged — lower risk than a `routers/` subpackage): `api_tenants.py` (tenant profiles + certificate), `api_jobs.py` (all scan-job routes incl. CSV import, cancel/delete, resolve-sharing-links, resolve-groups, test-connection, Excel export, detail), `api_onboarding.py` (Microsoft connect/callback/scan-app). Shared helpers (`_resolve_credentials`, `_create_job_from_targets`, `_enumerate_all_*`, `_to_job_summary`, `_to_tenant_item`, `_build_export_filename`, `_sharing_link_risk_label`, `_extract_sharing_link_group_and_type`) extracted to `api_helpers.py`.
+- **Verified behaviour-preserving** — captured the OpenAPI route set before/after; both expose the identical 22 endpoints (`diff` empty). Built the image, booted against a fresh DB: `/healthz`, `/api/version`, `/api/tenants`, `/api/scan-jobs` all respond, invalid `scan_type` still returns 422, no startup errors.
+
+## 2026-05-26 — Correctness P1: token cache, atomic job claim, timezone-aware datetimes, scan_type validation
+
+### Changed
+- **Token cache now has TTL + thread lock + MSAL app reuse** (`scanners/sharepoint.py`) — `_TOKEN_CACHE` previously stored access tokens as plain strings forever, so long scans started failing with 401s once the ~1h token expired. It now stores `(token, expires_at)` and refreshes 60s before expiry, guarded by a new `_TOKEN_LOCK` (the worker fetches tokens from multiple threads). New `_get_msal_app()` caches one `ConfidentialClientApplication` per `(tenant, client, auth_method)` so MSAL's own token cache is reused instead of building a fresh app on every call.
+- **Atomic job claim** (`worker.py`) — the queued-job selection now uses `.with_for_update(skip_locked=True)` (`SELECT … FOR UPDATE SKIP LOCKED`), so multiple worker threads/replicas can never claim the same job. Behaviour is unchanged for the current single worker but is now replica-safe.
+- **Timezone-aware datetimes everywhere** — replaced all 24 `datetime.utcnow()` (naive, deprecated) with `datetime.now(timezone.utc)` across `models.py`, `worker.py`, `main.py`, and `cert.py`. SQLAlchemy datetime columns are now `DateTime(timezone=True)`; model defaults use a new `_utcnow()` helper. New Alembic migration `0002_timestamptz` converts existing `timestamp without time zone` columns to `timestamptz` (reinterpreting stored values as UTC), guarded per-column so it is a no-op on databases already timestamptz. **Behaviour note:** API datetimes now carry a UTC offset, so the frontend renders them correctly in local time (previously stored UTC was shown as if local).
+- **`scan_type` request validation** (`schemas.py`) — `CreateScanJobRequest.scan_type` is now `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` instead of free `str`; invalid values return HTTP 422. The response model keeps `str` so legacy rows never trigger a serialization error. Verified: `scan_type=bogus` → 422, valid type passes schema validation.
+
+## 2026-05-26 — Alembic migrations replace startup `create_all` + raw ALTERs
+
+### Added
+- **Alembic introduced (`alembic==1.14.0`)** — schema is now version-controlled instead of being patched at every startup. New `clearview_app/migrations/` package (`env.py` reuses the app's SQLAlchemy engine and `Base.metadata`; `versions/0001_baseline.py` baseline) and dev-only `containers/clearview/alembic.ini` for manual CLI use. The app builds the Alembic `Config` programmatically, so `alembic.ini` is not shipped in the image.
+- **Baseline migration `0001_baseline`** — creates the full current schema via `Base.metadata.create_all`, guaranteed identical to the models (the same DDL the app emitted before). Future schema changes become explicit Alembic revisions.
+- **Startup bootstrap `clearview_app/db_migrate.run_migrations()`** — idempotent, three cases: fresh DB → `upgrade head`; existing pre-Alembic DB (tables present, no `alembic_version`) → `stamp head` (adopt baseline without re-creating); already under Alembic → `upgrade head`. Verified end-to-end against throwaway databases (fresh upgrade, existing-DB stamp, re-run no-op) and a local image boot test (`/healthz` OK, schema + `alembic_version=0001_baseline`).
+
+### Changed
+- **`main.py` startup** — `on_startup()` now calls `run_migrations()` instead of `Base.metadata.create_all(bind=engine)` + `_ensure_schema_columns()`. The 18-statement raw `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` block (`_ensure_schema_columns`) is removed; unused `Base`/`engine` imports dropped. The existing dev/prod database is adopted automatically (stamped to baseline) on first start of the new build — no manual migration step required.
+
 ## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
 
 ### Added