Dev build 2026-05-26 15:12

2026-05-26 15:12:35 +02:00 · 2026-05-26 15:12:35 +02:00 · 0cdeabc0e6
commit 0cdeabc0e6
parent 61db7fe4a7
22 changed files with 1639 additions and 1211 deletions
--- a/containers/clearview/alembic.ini
+++ b/containers/clearview/alembic.ini
@ -0,0 +1,45 @@
+# Alembic config for manual CLI use during development, e.g.:
+#   cd containers/clearview && DATABASE_URL=postgresql://... PYTHONPATH=src alembic revision -m "msg"
+#
+# The application itself does NOT read this file: clearview_app.db_migrate builds
+# an Alembic Config programmatically and env.py takes the database URL from
+# DATABASE_URL via clearview_app.config. sqlalchemy.url is therefore left blank.
+
+[alembic]
+script_location = src/clearview_app/migrations
+prepend_sys_path = src
+sqlalchemy.url =
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
--- a/containers/clearview/requirements.txt
+++ b/containers/clearview/requirements.txt
@ -1,6 +1,7 @@
 fastapi==0.115.0
 uvicorn[standard]==0.30.6
 sqlalchemy==2.0.36
+alembic==1.14.0
 psycopg[binary]==3.2.3
 python-multipart==0.0.12
 requests==2.32.3
--- a/containers/clearview/site/app.js
+++ b/containers/clearview/site/app.js
@ -104,6 +104,8 @@
    statTenants: document.getElementById('statTenants'),
    statJobs: document.getElementById('statJobs'),
    statRunning: document.getElementById('statRunning'),
+    statErrors: document.getElementById('statErrors'),
+    dashRecentJobs: document.getElementById('dashRecentJobs'),
  };

  // -------------------------------------------------------------------------
@ -610,6 +612,36 @@
  // Jobs list
  // -------------------------------------------------------------------------

+  function renderDashRecent(jobs) {
+    if (!els.dashRecentJobs) return;
+    if (!jobs.length) {
+      els.dashRecentJobs.innerHTML = '<tr><td colspan="6">No jobs yet.</td></tr>';
+      return;
+    }
+    els.dashRecentJobs.innerHTML = jobs.slice(0, 5).map(function (job) {
+      var jobIdSafe = escHtml(job.id);
+      var tenantLabel = job.tenant_name
+        ? escHtml(job.tenant_name)
+        : '<span style="color:var(--cv-text-secondary)">manual</span>';
+      var progress = job.total_targets > 0 ? (job.processed_targets + '/' + job.total_targets) : '0/0';
+      return '<tr style="cursor:pointer" data-dash-job="' + jobIdSafe + '">' +
+        '<td><code>' + jobIdSafe + '</code></td>' +
+        '<td>' + escHtml(job.scan_type || 'sharepoint') + '</td>' +
+        '<td>' + tenantLabel + '</td>' +
+        '<td>' + statusBadge(job.status) + '</td>' +
+        '<td>' + progress + '</td>' +
+        '<td>' + formatDate(job.updated_at) + '</td>' +
+        '</tr>';
+    }).join('');
+    els.dashRecentJobs.querySelectorAll('[data-dash-job]').forEach(function (row) {
+      row.addEventListener('click', function () {
+        state.selectedJobId = row.getAttribute('data-dash-job');
+        navigateTo('jobs');
+        refreshSelectedJob().catch(function () {});
+      });
+    });
+  }
+
  async function refreshJobs() {
    const filterTenant = els.jobTenantFilter.value;
    const filterType = els.jobTypeFilter ? els.jobTypeFilter.value : '';
@ -626,6 +658,12 @@
    els.statRunning.textContent = String(jobs.filter(function (j) {
      return j.status === 'running' || j.status === 'queued';
    }).length);
+    if (els.statErrors) {
+      els.statErrors.textContent = String(jobs.filter(function (j) {
+        return j.status === 'completed_with_errors' || (j.failed_targets || 0) > 0;
+      }).length);
+    }
+    renderDashRecent(jobs);

    if (!jobs.length) {
      els.jobsTableBody.innerHTML = '<tr><td colspan="9">No jobs yet.</td></tr>';
@ -654,22 +692,23 @@
      } else {
        typeLabel = '<span class="risk ok">SharePoint</span>';
      }
+      const jobIdSafe = escHtml(job.id);
      return (
        '<tr>' +
-          '<td><code>' + job.id + '</code></td>' +
+          '<td><code>' + jobIdSafe + '</code></td>' +
          '<td>' + typeLabel + '</td>' +
          '<td>' + tenantLabel + '</td>' +
-          '<td>' + job.source_type + '</td>' +
+          '<td>' + escHtml(job.source_type) + '</td>' +
          '<td>' + statusBadge(job.status) + '</td>' +
          '<td>' + progress + '</td>' +
          '<td>' + (job.items_scanned > 0 ? job.items_scanned : '-') + '</td>' +
          '<td>' + formatDate(job.updated_at) + '</td>' +
          '<td>' +
            '<div style="display:flex;gap:0.4rem">' +
-              '<button class="btn btn-outline btn-small" data-job-inspect="' + job.id + '">Inspect</button>' +
+              '<button class="btn btn-outline btn-small" data-job-inspect="' + jobIdSafe + '">Inspect</button>' +
              (job.status === 'queued' || job.status === 'running'
-                ? '<button class="btn btn-outline btn-small" data-job-cancel="' + job.id + '">Cancel</button>'
-                : '<button class="btn btn-outline btn-small" data-job-delete="' + job.id + '">Delete</button>') +
+                ? '<button class="btn btn-outline btn-small" data-job-cancel="' + jobIdSafe + '">Cancel</button>'
+                : '<button class="btn btn-outline btn-small" data-job-delete="' + jobIdSafe + '">Delete</button>') +
            '</div>' +
          '</td>' +
        '</tr>'
@ -1527,14 +1566,16 @@
    return hash;
  }

-  function applyRoute(route) {
+  function applyRoute(route, moveFocus) {
    if (!ROUTE_TITLES[route]) {
      route = 'dashboard';
    }
    state.currentRoute = route;
+    var activePage = null;
    document.querySelectorAll('.route-page').forEach(function (page) {
      if (page.getAttribute('data-route-page') === route) {
        page.removeAttribute('hidden');
+        activePage = page;
      } else {
        page.setAttribute('hidden', '');
      }
@ -1549,6 +1590,16 @@
    if (els.contentTitle) {
      els.contentTitle.textContent = ROUTE_TITLES[route];
    }
+    document.title = 'Clearview | ' + ROUTE_TITLES[route];
+    // On user navigation, move focus to the new page's first heading so
+    // screen-reader and keyboard users land in the freshly shown content.
+    if (moveFocus && activePage) {
+      var heading = activePage.querySelector('h1, h2');
+      if (heading) {
+        heading.setAttribute('tabindex', '-1');
+        heading.focus();
+      }
+    }
  }

  function navigateTo(route) {
@ -1560,12 +1611,12 @@
    if (window.location.hash !== hash) {
      window.location.hash = hash;
    } else {
-      applyRoute(route);
+      applyRoute(route, true);
    }
  }

  window.addEventListener('hashchange', function () {
-    applyRoute(parseRoute());
+    applyRoute(parseRoute(), true);
  });

  applyRoute(parseRoute());
--- a/containers/clearview/site/index.html
+++ b/containers/clearview/site/index.html
@ -73,6 +73,33 @@
            <span class="kpi" id="statRunning">0</span>
            <span class="label">Active Jobs</span>
          </article>
+          <article>
+            <span class="kpi" id="statErrors">0</span>
+            <span class="label">With errors</span>
+          </article>
+        </div>
+      </div>
+
+      <div class="panel">
+        <div class="panel-header split">
+          <h2>Recent jobs</h2>
+        </div>
+        <div class="table-wrap">
+          <table>
+            <thead>
+              <tr>
+                <th>Job ID</th>
+                <th>Type</th>
+                <th>Tenant</th>
+                <th>Status</th>
+                <th>Targets</th>
+                <th>Updated</th>
+              </tr>
+            </thead>
+            <tbody id="dashRecentJobs">
+              <tr><td colspan="6">No jobs yet.</td></tr>
+            </tbody>
+          </table>
        </div>
      </div>
    </section>
--- a/containers/clearview/site/styles.css
+++ b/containers/clearview/site/styles.css
@ -55,38 +55,12 @@ body {
  background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0));
 }

-.topbar {
-  width: min(1100px, calc(100% - 2rem));
-  margin: 1.1rem auto 0;
-  padding: 0.95rem 1.1rem;
-  border: 1px solid var(--cv-border);
-  border-radius: 18px;
-  background: rgba(255, 255, 255, 0.75);
-  backdrop-filter: blur(8px);
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  box-shadow: 0 10px 24px rgba(20, 20, 19, 0.08);
-}
-
 .brand-logo {
  height: 42px;
  width: auto;
  display: block;
 }

-.topbar-actions {
-  display: flex;
-  gap: 0.6rem;
-}
-
-.layout {
-  width: min(1100px, calc(100% - 2rem));
-  margin: 1rem auto 2.5rem;
-  display: grid;
-  gap: 1rem;
-}
-
 .hero,
 .panel {
  border-radius: 22px;
@ -131,7 +105,7 @@ h2 {
 .hero-stats {
  margin-top: 1.3rem;
  display: grid;
-  grid-template-columns: repeat(3, minmax(0, 1fr));
+  grid-template-columns: repeat(4, minmax(0, 1fr));
  gap: 0.75rem;
 }

@ -291,8 +265,9 @@ textarea {
 input:focus,
 select:focus,
 textarea:focus,
-button:focus {
-  outline: 2px solid rgba(14, 165, 233, 0.38);
+button:focus,
+a:focus-visible {
+  outline: 2px solid var(--cv-accent);
  outline-offset: 2px;
 }

@ -533,8 +508,8 @@ strong {
 }

 .risk.warn {
-  background: rgba(14, 165, 233, 0.15);
-  color: var(--cv-accent-dark);
+  background: rgba(234, 179, 8, 0.18);
+  color: #854d0e;
 }

 .risk.high {
@ -584,12 +559,6 @@ strong {
 }

@media (max-width: 930px) {
-  .topbar {
-    flex-direction: column;
-    align-items: flex-start;
-    gap: 0.8rem;
-  }
-
  .hero-stats {
    grid-template-columns: 1fr;
  }
@ -616,11 +585,6 @@ strong {
 }

@media (max-width: 640px) {
-  .layout,
-  .topbar {
-    width: calc(100% - 1rem);
-  }
-
  .hero,
  .panel {
    border-radius: 16px;
@ -633,14 +597,6 @@ strong {
  .hero h1 {
    max-width: none;
  }
-
-  .topbar-actions {
-    width: 100%;
-  }
-
-  .topbar-actions .btn {
-    flex: 1;
-  }
 }

 /* ===========================================================================
--- a/containers/clearview/src/clearview_app/api_helpers.py
+++ b/containers/clearview/src/clearview_app/api_helpers.py
@ -0,0 +1,321 @@
+"""Shared helpers for the API route modules.
+
+Extracted verbatim from the original monolithic ``main.py`` so the route
+modules (``api_tenants``, ``api_jobs``) can share credential resolution, job
+creation, response mapping, and export helpers without circular imports.
+"""
+from __future__ import annotations
+
+import re
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import HTTPException
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from .db import SessionLocal
+from .default_sites import is_default_site, normalize_site_url
+from .models import ScanJob, ScanTarget, TenantProfile
+from .scanners import AuthConfig
+from .schemas import ScanJobCreateResponse, ScanJobSummary, TenantProfileItem
+
+
+def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
+    """
+    Extract (group_name, link_type) from principal values such as:
+    - SharingLinks.<guid>.<LinkType>.<guid>
+    - c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
+    """
+    if not principal:
+        return None
+
+    text = principal.strip()
+    segments = [s.strip() for s in text.split("|") if s.strip()]
+
+    candidate = ""
+    for segment in reversed(segments):
+        if segment.lower().startswith("sharinglinks."):
+            candidate = segment
+            break
+    if not candidate and text.lower().startswith("sharinglinks."):
+        candidate = text
+    if not candidate:
+        return None
+
+    parts = candidate.split(".")
+    if len(parts) < 3:
+        return None
+    return candidate, parts[2]
+
+
+_SCAN_TYPE_LABELS = {
+    "sharepoint": "Deviations",
+    "sharepoint_root": "Root",
+    "mailbox": "Mailbox",
+    "entra_groups": "EntraGroups",
+}
+
+
+def _build_export_filename(job: ScanJob, job_id: str) -> str:
+    tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
+    safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
+    scan_type = job.scan_type or "sharepoint"
+    type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
+    short_id = job_id.replace("-", "")[-12:]
+    return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
+
+
+def _enumerate_all_entra_groups(
+    tenant_id: str,
+    client_id: str,
+    client_secret: str | None,
+    profile_id: str | None,
+) -> list[str]:
+    cert_private_key: str | None = None
+    cert_thumbprint: str | None = None
+    cert_public_pem: str | None = None
+    if profile_id:
+        with SessionLocal() as db:
+            profile = db.get(TenantProfile, profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+    auth = AuthConfig(
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret or "",
+        cert_private_key=cert_private_key,
+        cert_thumbprint=cert_thumbprint,
+        cert_public_pem=cert_public_pem,
+    )
+
+    from .scanners import entra as _entra
+
+    try:
+        return _entra.list_all_groups(auth)
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
+
+
+def _enumerate_all_mailboxes(
+    organization: str | None,
+    tenant_id: str,
+    client_id: str,
+    client_secret: str | None,
+    profile_id: str | None,
+) -> list[str]:
+    if not organization or "." not in organization:
+        raise HTTPException(
+            status_code=400,
+            detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
+        )
+
+    cert_private_key: str | None = None
+    cert_thumbprint: str | None = None
+    cert_public_pem: str | None = None
+    if profile_id:
+        with SessionLocal() as db:
+            profile = db.get(TenantProfile, profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+    auth = AuthConfig(
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret or "",
+        cert_private_key=cert_private_key,
+        cert_thumbprint=cert_thumbprint,
+        cert_public_pem=cert_public_pem,
+    )
+
+    from .scanners import mailbox as _mailbox
+
+    try:
+        return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
+
+
+def _resolve_credentials(
+    db,
+    tenant_profile_id: str | None,
+    tenant_id: str | None,
+    client_id: str | None,
+    client_secret: str | None,
+) -> tuple[str, str, str | None, str | None]:
+    if tenant_profile_id:
+        profile = db.get(TenantProfile, tenant_profile_id)
+        if not profile:
+            raise HTTPException(status_code=404, detail="Tenant profile not found")
+        if not profile.client_secret and not profile.cert_thumbprint:
+            raise HTTPException(
+                status_code=400,
+                detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
+            )
+        return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
+    if tenant_id and client_id and client_secret:
+        return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
+    raise HTTPException(
+        status_code=400,
+        detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
+    )
+
+
+def _create_job_from_targets(
+    raw_targets: list[str],
+    scan_type: str,
+    skip_default_sites: bool,
+    source_type: str,
+    tenant_id: str,
+    client_id: str,
+    client_secret: str,
+    tenant_profile_id: str | None = None,
+) -> ScanJobCreateResponse:
+    accepted: list[str] = []
+    skipped_default_urls: list[str] = []
+    invalid: list[str] = []
+
+    seen: set[str] = set()
+
+    for raw in raw_targets:
+        if scan_type == "mailbox":
+            normalized = (raw or "").strip().lower()
+            if not normalized or "@" not in normalized:
+                invalid.append(raw)
+                continue
+        elif scan_type == "entra_groups":
+            normalized = (raw or "").strip()
+            if not normalized:
+                invalid.append(raw)
+                continue
+        else:
+            normalized = normalize_site_url(raw) or ""
+            if not normalized:
+                invalid.append(raw)
+                continue
+
+        if normalized in seen:
+            continue
+        seen.add(normalized)
+
+        if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
+            skipped_default_urls.append(normalized)
+            continue
+
+        accepted.append(normalized)
+
+    with SessionLocal() as db:
+        now = datetime.now(timezone.utc)
+        job = ScanJob(
+            id=str(uuid.uuid4()),
+            source_type=source_type,
+            scan_type=scan_type,
+            status="queued" if accepted else "completed",
+            skip_default_sites=skip_default_sites,
+            tenant_profile_id=tenant_profile_id,
+            auth_tenant_id=tenant_id,
+            auth_client_id=client_id,
+            auth_client_secret=client_secret,
+            total_targets=len(accepted),
+            skipped_targets=len(skipped_default_urls),
+            warning_message=None,
+            error_message=None,
+            created_at=now,
+            updated_at=now,
+            finished_at=now if not accepted else None,
+        )
+
+        if not accepted:
+            if scan_type == "mailbox":
+                job.warning_message = "No scannable mailboxes after validation"
+            else:
+                job.warning_message = "No scannable sites after validation and default-site filtering"
+
+        db.add(job)
+        db.flush()
+
+        for index, target in enumerate(accepted, start=1):
+            db.add(
+                ScanTarget(
+                    job_id=job.id,
+                    site_url=target,
+                    source_row=index,
+                    status="queued",
+                    attempts=0,
+                    created_at=now,
+                    updated_at=now,
+                )
+            )
+
+        db.commit()
+
+        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
+        job = db.execute(stmt).unique().scalar_one()
+
+        return ScanJobCreateResponse(
+            job=_to_job_summary(job),
+            accepted_urls=accepted,
+            skipped_default_urls=skipped_default_urls,
+            invalid_urls=invalid,
+        )
+
+
+def _to_job_summary(job: ScanJob) -> ScanJobSummary:
+    return ScanJobSummary(
+        id=job.id,
+        status=job.status,
+        source_type=job.source_type,
+        scan_type=job.scan_type or "sharepoint",
+        skip_default_sites=job.skip_default_sites,
+        tenant_profile_id=job.tenant_profile_id,
+        tenant_name=job.tenant_profile.name if job.tenant_profile else None,
+        total_targets=job.total_targets,
+        processed_targets=job.processed_targets,
+        successful_targets=job.successful_targets,
+        failed_targets=job.failed_targets,
+        skipped_targets=job.skipped_targets,
+        items_scanned=job.items_scanned,
+        scan_activity=job.scan_activity if job.status == "running" else None,
+        warning_message=job.warning_message,
+        error_message=job.error_message,
+        created_at=job.created_at,
+        updated_at=job.updated_at,
+        started_at=job.started_at,
+        finished_at=job.finished_at,
+    )
+
+
+def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
+    return TenantProfileItem(
+        id=profile.id,
+        name=profile.name,
+        tenant_id=profile.tenant_id,
+        primary_domain=profile.primary_domain,
+        client_id=profile.client_id,
+        has_certificate=bool(profile.cert_thumbprint),
+        cert_thumbprint=profile.cert_thumbprint,
+        cert_expires_at=profile.cert_expires_at,
+        created_at=profile.created_at,
+        updated_at=profile.updated_at,
+    )
+
+
+def _sharing_link_risk_label(principal: str) -> str:
+    if not principal.startswith("SharingLinks."):
+        return ""
+    parts = principal.split(".", 3)
+    link_type = parts[2] if len(parts) >= 3 else ""
+    if link_type.startswith("Anonymous"):
+        return "Critical"
+    if link_type == "Flexible":
+        return "High"
+    if link_type.startswith("Organization"):
+        return "Low"
+    if link_type.startswith("Direct"):
+        return "Low"
+    return "Unknown"
--- a/containers/clearview/src/clearview_app/api_jobs.py
+++ b/containers/clearview/src/clearview_app/api_jobs.py
@ -0,0 +1,645 @@
+"""Scan-job routes: create, list, inspect, cancel, delete, resolve, export."""
+from __future__ import annotations
+
+import io
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+from fastapi.responses import Response, StreamingResponse
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from .api_helpers import (
+    _build_export_filename,
+    _create_job_from_targets,
+    _enumerate_all_entra_groups,
+    _enumerate_all_mailboxes,
+    _extract_sharing_link_group_and_type,
+    _resolve_credentials,
+    _sharing_link_risk_label,
+    _to_job_summary,
+)
+from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
+from .db import SessionLocal
+from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
+from .scanners import AuthConfig, probe
+from .schemas import (
+    CreateScanJobRequest,
+    PermissionDeviationItem,
+    ProbeResultResponse,
+    ResolveGroupsResponse,
+    ResolveSharingLinksRequest,
+    ResolveSharingLinksResponse,
+    ScanJobCreateResponse,
+    ScanJobDetail,
+    ScanJobSummary,
+    ScanTargetItem,
+    SharingLinkTypesResponse,
+)
+
+router = APIRouter()
+
+
+@router.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
+def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
+    with SessionLocal() as db:
+        tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
+            db=db,
+            tenant_profile_id=payload.tenant_profile_id,
+            tenant_id=payload.tenant_id,
+            client_id=payload.client_id,
+            client_secret=payload.client_secret,
+        )
+    source_type = "manual"
+    if payload.scan_type == "entra_groups":
+        if payload.scan_all_groups:
+            raw_targets = _enumerate_all_entra_groups(
+                tenant_id=tenant_id,
+                client_id=client_id,
+                client_secret=client_secret,
+                profile_id=profile_id,
+            )
+            source_type = "tenant_all"
+        else:
+            raw_targets = [str(g) for g in payload.group_ids]
+    elif payload.scan_type == "mailbox":
+        if payload.scan_all_mailboxes:
+            organization = payload.organization
+            if (not organization) and profile_id:
+                with SessionLocal() as db:
+                    profile = db.get(TenantProfile, profile_id)
+                    if profile and profile.primary_domain:
+                        organization = profile.primary_domain
+            raw_targets = _enumerate_all_mailboxes(
+                organization=organization,
+                tenant_id=tenant_id,
+                client_id=client_id,
+                client_secret=client_secret,
+                profile_id=profile_id,
+            )
+            source_type = "tenant_all"
+        else:
+            raw_targets = [str(m) for m in payload.mailboxes]
+    else:
+        raw_targets = [str(item) for item in payload.site_urls]
+    return _create_job_from_targets(
+        raw_targets=raw_targets,
+        scan_type=payload.scan_type,
+        skip_default_sites=payload.skip_default_sites,
+        source_type=source_type,
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret,
+        tenant_profile_id=profile_id,
+    )
+
+
+@router.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
+def create_scan_job_from_csv(
+    skip_default_sites: bool = True,
+    scan_type: str = Form("sharepoint"),
+    tenant_profile_id: str | None = Form(None),
+    tenant_id: str | None = Form(None),
+    client_id: str | None = Form(None),
+    client_secret: str | None = Form(None),
+    file: UploadFile = File(...),
+) -> ScanJobCreateResponse:
+    with SessionLocal() as db:
+        resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
+            db=db,
+            tenant_profile_id=tenant_profile_id,
+            tenant_id=tenant_id,
+            client_id=client_id,
+            client_secret=client_secret,
+        )
+    content = file.file.read()
+    if scan_type == "mailbox":
+        parsed = parse_mailboxes_csv(content)
+        targets = parsed.mailboxes
+    elif scan_type == "entra_groups":
+        parsed = parse_entra_groups_csv(content)
+        targets = parsed.urls
+    else:
+        parsed = parse_sites_csv(content)
+        targets = parsed.urls
+    response = _create_job_from_targets(
+        raw_targets=targets,
+        scan_type=scan_type,
+        skip_default_sites=skip_default_sites,
+        source_type="csv",
+        tenant_id=resolved_tenant_id,
+        client_id=resolved_client_id,
+        client_secret=resolved_client_secret,
+        tenant_profile_id=profile_id,
+    )
+
+    if parsed.invalid_rows:
+        csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
+        with SessionLocal() as db:
+            job = db.get(ScanJob, response.job.id)
+            if job:
+                if job.warning_message:
+                    job.warning_message = f"{job.warning_message} | {csv_warning}"
+                else:
+                    job.warning_message = csv_warning
+                job.updated_at = datetime.now(timezone.utc)
+                db.commit()
+                db.refresh(job)
+                response.job.warning_message = job.warning_message
+
+    return response
+
+
+@router.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
+def cancel_scan_job(job_id: str) -> ScanJobSummary:
+    with SessionLocal() as db:
+        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
+        job = db.execute(stmt).unique().scalar_one_or_none()
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status not in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is not queued or running")
+        now = datetime.now(timezone.utc)
+        job.status = "cancelled"
+        job.updated_at = now
+        job.finished_at = now
+        job.scan_activity = None
+        db.commit()
+        db.refresh(job)
+        stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
+        job = db.execute(stmt).unique().scalar_one()
+        return _to_job_summary(job)
+
+
+@router.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
+def delete_scan_job(job_id: str) -> Response:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
+        db.delete(job)
+        db.commit()
+        return Response(status_code=204)
+
+
+@router.get("/api/scan-jobs", response_model=list[ScanJobSummary])
+def list_scan_jobs(
+    limit: int = 20,
+    tenant_profile_id: str | None = None,
+    scan_type: str | None = None,
+) -> list[ScanJobSummary]:
+    with SessionLocal() as db:
+        stmt = (
+            select(ScanJob)
+            .options(joinedload(ScanJob.tenant_profile))
+            .order_by(ScanJob.created_at.desc())
+            .limit(max(1, min(limit, 100)))
+        )
+        if tenant_profile_id:
+            stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
+        if scan_type:
+            stmt = stmt.where(ScanJob.scan_type == scan_type)
+        jobs = list(db.execute(stmt).unique().scalars())
+        return [_to_job_summary(job) for job in jobs]
+
+
+@router.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
+def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        principals = list(
+            db.execute(
+                select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
+            ).scalars()
+        )
+
+    type_counts: dict[str, int] = {}
+    for principal in principals:
+        parsed = _extract_sharing_link_group_and_type(str(principal or ""))
+        if not parsed:
+            continue
+        _group_name, link_type = parsed
+        type_counts[link_type] = type_counts.get(link_type, 0) + 1
+
+    return SharingLinkTypesResponse(type_counts=type_counts)
+
+
+@router.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
+def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
+    from .scanner import resolve_sharing_link_members
+
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is still running")
+
+        cert_private_key: str | None = None
+        cert_thumbprint: str | None = None
+        cert_public_pem: str | None = None
+        if job.tenant_profile_id:
+            profile = db.get(TenantProfile, job.tenant_profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+        auth = AuthConfig(
+            tenant_id=job.auth_tenant_id or "",
+            client_id=job.auth_client_id or "",
+            client_secret=job.auth_client_secret or "",
+            cert_private_key=cert_private_key,
+            cert_thumbprint=cert_thumbprint,
+            cert_public_pem=cert_public_pem,
+        )
+
+        all_deviations = list(
+            db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
+        )
+
+    # Group by (site_url, principal) so each unique group is resolved once
+    groups: dict[tuple[str, str], list[int]] = {}
+    for dev in all_deviations:
+        parsed = _extract_sharing_link_group_and_type(dev.principal)
+        if not parsed:
+            continue
+        group_name, link_type = parsed
+        if link_type not in payload.link_types:
+            continue
+        key = (dev.site_url, group_name)
+        groups.setdefault(key, []).append(dev.id)
+
+    updated_deviations = 0
+    for (site_url, group_name), dev_ids in groups.items():
+        members = resolve_sharing_link_members(site_url, group_name, auth)
+        resolved_members = ", ".join(members) if members else ""
+        with SessionLocal() as db:
+            for dev_id in dev_ids:
+                dev = db.get(PermissionDeviation, dev_id)
+                if dev:
+                    dev.resolved_members = resolved_members
+            db.commit()
+        updated_deviations += len(dev_ids)
+
+    return ResolveSharingLinksResponse(
+        resolved_groups=len(groups),
+        updated_deviations=updated_deviations,
+    )
+
+
+@router.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
+def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
+    """
+    Expand group principals on this job's deviations and write each group's
+    member list to permission_deviations.resolved_members. Handles both
+    classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
+    assigned directly at root (via Microsoft Graph). Skips email-shape users
+    and SharingLinks groups (those have their own resolver).
+    """
+    from .scanners.sharepoint import (
+        is_aad_group_principal,
+        is_sharepoint_group_principal,
+        resolve_aad_group_members,
+        resolve_sharing_link_members,
+    )
+
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is still running")
+        if (job.scan_type or "sharepoint") == "mailbox":
+            raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
+
+        cert_private_key: str | None = None
+        cert_thumbprint: str | None = None
+        cert_public_pem: str | None = None
+        if job.tenant_profile_id:
+            profile = db.get(TenantProfile, job.tenant_profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+        auth = AuthConfig(
+            tenant_id=job.auth_tenant_id or "",
+            client_id=job.auth_client_id or "",
+            client_secret=job.auth_client_secret or "",
+            cert_private_key=cert_private_key,
+            cert_thumbprint=cert_thumbprint,
+            cert_public_pem=cert_public_pem,
+        )
+
+        all_deviations = list(
+            db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
+        )
+
+    # Group deviations by (site_url, principal) so each unique group is resolved once
+    groups: dict[tuple[str, str], list[int]] = {}
+    for dev in all_deviations:
+        if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
+            continue
+        key = (dev.site_url, dev.principal)
+        groups.setdefault(key, []).append(dev.id)
+
+    resolved = 0
+    skipped = 0
+    updated = 0
+    for (site_url, group_name), dev_ids in groups.items():
+        try:
+            if is_aad_group_principal(group_name):
+                members = resolve_aad_group_members(group_name, auth)
+            else:
+                members = resolve_sharing_link_members(site_url, group_name, auth)
+        except Exception:  # noqa: BLE001
+            members = []
+
+        if not members:
+            skipped += 1
+            continue
+
+        resolved_text = ", ".join(members)
+        with SessionLocal() as db:
+            for dev_id in dev_ids:
+                dev = db.get(PermissionDeviation, dev_id)
+                if dev:
+                    dev.resolved_members = resolved_text
+            db.commit()
+        resolved += 1
+        updated += len(dev_ids)
+
+    return ResolveGroupsResponse(
+        resolved_groups=resolved,
+        skipped_groups=skipped,
+        updated_deviations=updated,
+    )
+
+
+@router.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
+def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        target = db.get(ScanTarget, target_id)
+        if not target or target.job_id != job_id:
+            raise HTTPException(status_code=404, detail="Target not found")
+        if job.status in ("queued", "running"):
+            raise HTTPException(status_code=409, detail="Job is still running")
+
+        cert_private_key: str | None = None
+        cert_thumbprint: str | None = None
+        cert_public_pem: str | None = None
+        if job.tenant_profile_id:
+            profile = db.get(TenantProfile, job.tenant_profile_id)
+            if profile:
+                cert_private_key = profile.cert_private_key
+                cert_thumbprint = profile.cert_thumbprint
+                cert_public_pem = profile.cert_public_pem
+
+        auth = AuthConfig(
+            tenant_id=job.auth_tenant_id or "",
+            client_id=job.auth_client_id or "",
+            client_secret=job.auth_client_secret or "",
+            cert_private_key=cert_private_key,
+            cert_thumbprint=cert_thumbprint,
+            cert_public_pem=cert_public_pem,
+        )
+        site_url = target.site_url
+        job_scan_type = job.scan_type or "sharepoint"
+
+    result = probe(job_scan_type, site_url, auth)
+
+    with SessionLocal() as db:
+        target = db.get(ScanTarget, target_id)
+        if not target:
+            raise HTTPException(status_code=404, detail="Target not found")
+        now = datetime.now(timezone.utc)
+        target.last_probe_at = now
+        target.last_probe_ok = result.ok
+        target.last_probe_message = result.message
+        target.updated_at = now
+        db.commit()
+        db.refresh(target)
+        return ProbeResultResponse(
+            target_id=target.id,
+            ok=result.ok,
+            message=result.message,
+            last_probe_at=target.last_probe_at,
+        )
+
+
+@router.get("/api/scan-jobs/{job_id}/export")
+def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
+    import openpyxl
+    from openpyxl.styles import Font, PatternFill
+
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
+        if site_url:
+            targets_q = targets_q.where(ScanTarget.site_url == site_url)
+        targets = list(db.execute(targets_q).scalars())
+
+        deviations_q = (
+            select(PermissionDeviation)
+            .where(PermissionDeviation.job_id == job.id)
+            .order_by(PermissionDeviation.id.desc())
+        )
+        if site_url:
+            deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
+        deviations = list(db.execute(deviations_q).scalars())
+
+    wb = openpyxl.Workbook()
+    header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
+    header_font_white = Font(bold=True, color="FFFFFF")
+
+    _risk_styles: dict[str, tuple] = {
+        "Critical": (
+            PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
+            Font(bold=True, color="7B0000"),
+        ),
+        "High": (
+            PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
+            Font(bold=True, color="7C2D00"),
+        ),
+        "Low": (
+            PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
+            Font(bold=True, color="0C4A6E"),
+        ),
+        "Unknown": (
+            PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
+            Font(bold=True, color="555555"),
+        ),
+    }
+
+    def _style_header(ws, headers):
+        ws.append(headers)
+        for cell in ws[1]:
+            cell.font = header_font_white
+            cell.fill = header_fill
+
+    scan_type = job.scan_type or "sharepoint"
+
+    target_label = {
+        "sharepoint": "Site URL",
+        "sharepoint_root": "Site URL",
+        "mailbox": "Mailbox",
+        "entra_groups": "Group",
+    }.get(scan_type, "Target")
+
+    # Targets sheet
+    ws_targets = wb.active
+    ws_targets.title = "Targets"
+    _style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
+    for t in targets:
+        ws_targets.append([
+            t.site_url,
+            t.status,
+            t.attempts,
+            t.error_message or "",
+            t.started_at.isoformat() if t.started_at else "",
+            t.finished_at.isoformat() if t.finished_at else "",
+        ])
+    for col in ws_targets.columns:
+        ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
+
+    # Results sheet — name and columns depend on scan type
+    if scan_type == "mailbox":
+        ws_dev = wb.create_sheet("Mailbox Permissions")
+        _style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
+        deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
+        for d in deviations:
+            ws_dev.append([
+                d.site_url,
+                d.object_url,
+                d.permission_type or d.object_type,
+                d.principal,
+                d.role_name,
+            ])
+    elif scan_type == "entra_groups":
+        ws_dev = wb.create_sheet("Group Memberships")
+        _style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
+        deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
+        for d in deviations:
+            ws_dev.append([
+                d.object_url,
+                d.permission_type or "",
+                d.principal,
+                d.role_name,
+            ])
+    elif scan_type == "sharepoint_root":
+        ws_dev = wb.create_sheet("Root Permissions")
+        _style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
+        deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
+        for d in deviations:
+            ws_dev.append([
+                d.site_url,
+                d.principal,
+                d.resolved_members or "",
+                d.role_name,
+            ])
+    else:
+        ws_dev = wb.create_sheet("Deviations")
+        _style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
+        deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
+        for d in deviations:
+            base = (d.site_url or "").rstrip("/")
+            obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
+            link_risk = _sharing_link_risk_label(d.principal)
+            ws_dev.append([
+                d.site_url,
+                obj_rel,
+                d.object_type,
+                d.principal,
+                link_risk,
+                d.resolved_members or "",
+                d.role_name,
+                d.delta_type,
+            ])
+            if link_risk in _risk_styles:
+                risk_fill, risk_font = _risk_styles[link_risk]
+                risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
+                risk_cell.fill = risk_fill
+                risk_cell.font = risk_font
+    for col in ws_dev.columns:
+        ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
+
+    buf = io.BytesIO()
+    wb.save(buf)
+    buf.seek(0)
+
+    filename = _build_export_filename(job, job_id)
+    return StreamingResponse(
+        buf,
+        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+
+
+@router.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
+def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
+    with SessionLocal() as db:
+        job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
+        if site_url:
+            targets_q = targets_q.where(ScanTarget.site_url == site_url)
+        targets = list(db.execute(targets_q).scalars())
+
+        deviations_q = (
+            select(PermissionDeviation)
+            .where(PermissionDeviation.job_id == job.id)
+            .order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
+        )
+        if site_url:
+            deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
+        else:
+            deviations_q = deviations_q.limit(1000)
+        deviations = list(db.execute(deviations_q).scalars())
+
+        return ScanJobDetail(
+            **_to_job_summary(job).model_dump(),
+            targets=[
+                ScanTargetItem(
+                    id=t.id,
+                    site_url=t.site_url,
+                    status=t.status,
+                    attempts=t.attempts,
+                    error_message=t.error_message,
+                    started_at=t.started_at,
+                    finished_at=t.finished_at,
+                    last_probe_at=t.last_probe_at,
+                    last_probe_ok=t.last_probe_ok,
+                    last_probe_message=t.last_probe_message,
+                )
+                for t in targets
+            ],
+            deviations=[
+                PermissionDeviationItem(
+                    id=d.id,
+                    site_url=d.site_url,
+                    object_url=d.object_url,
+                    object_type=d.object_type,
+                    principal=d.principal,
+                    role_name=d.role_name,
+                    delta_type=d.delta_type,
+                    permission_type=d.permission_type,
+                    resolved_members=d.resolved_members,
+                    created_at=d.created_at,
+                )
+                for d in deviations
+            ],
+        )
--- a/containers/clearview/src/clearview_app/api_onboarding.py
+++ b/containers/clearview/src/clearview_app/api_onboarding.py
@ -0,0 +1,76 @@
+"""Microsoft onboarding routes (admin-consent connect + scan-app creation)."""
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import RedirectResponse
+
+from .onboarding import (
+    OnboardingError,
+    consume_callback_state,
+    create_connect_url,
+    create_scan_app_for_tenant,
+)
+from .schemas import (
+    ConnectMicrosoftResponse,
+    CreateScanAppRequest,
+    CreateScanAppResponse,
+)
+
+router = APIRouter()
+
+
+@router.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
+def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
+    try:
+        result = create_scan_app_for_tenant(
+            tenant_id=payload.tenant_id,
+            display_name=payload.display_name,
+        )
+    except OnboardingError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:  # noqa: BLE001
+        raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
+
+    return CreateScanAppResponse(
+        tenant_id=result.tenant_id,
+        client_id=result.client_id,
+        client_secret=result.client_secret,
+        app_object_id=result.app_object_id,
+        service_principal_id=result.service_principal_id,
+        display_name=result.display_name,
+    )
+
+
+@router.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
+def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
+    try:
+        return ConnectMicrosoftResponse(connect_url=create_connect_url())
+    except OnboardingError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/api/onboarding/microsoft/callback")
+def onboarding_microsoft_callback(
+    tenant: str | None = None,
+    state: str | None = None,
+    error: str | None = None,
+    error_description: str | None = None,
+) -> RedirectResponse:
+    if error:
+        message = (error_description or error).replace(" ", "+")
+        return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
+
+    if not state or not consume_callback_state(state):
+        return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
+
+    if not tenant:
+        return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
+
+    return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
+
+
+@router.get("/api/onboarding/status")
+def onboarding_status() -> dict[str, bool]:
+    from . import config
+    automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
+    return {"automated_available": automated}
--- a/containers/clearview/src/clearview_app/api_tenants.py
+++ b/containers/clearview/src/clearview_app/api_tenants.py
@ -0,0 +1,86 @@
+"""Tenant profile + certificate routes."""
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import Response
+from sqlalchemy import select, text
+
+from .api_helpers import _to_tenant_item
+from .cert import generate_tenant_certificate
+from .db import SessionLocal
+from .models import TenantProfile
+from .schemas import (
+    CreateTenantProfileRequest,
+    TenantCertificateResponse,
+    TenantProfileItem,
+)
+
+router = APIRouter()
+
+
+@router.get("/api/tenants", response_model=list[TenantProfileItem])
+def list_tenants() -> list[TenantProfileItem]:
+    with SessionLocal() as db:
+        profiles = list(
+            db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
+        )
+        return [_to_tenant_item(p) for p in profiles]
+
+
+@router.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
+def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
+    with SessionLocal() as db:
+        now = datetime.now(timezone.utc)
+        profile = TenantProfile(
+            id=str(uuid.uuid4()),
+            name=payload.name.strip(),
+            tenant_id=payload.tenant_id.strip(),
+            primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
+            client_id=payload.client_id.strip(),
+            client_secret=payload.client_secret.strip() if payload.client_secret else None,
+            created_at=now,
+            updated_at=now,
+        )
+        db.add(profile)
+        db.commit()
+        db.refresh(profile)
+        return _to_tenant_item(profile)
+
+
+@router.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
+def generate_certificate(profile_id: str) -> TenantCertificateResponse:
+    with SessionLocal() as db:
+        profile = db.get(TenantProfile, profile_id)
+        if not profile:
+            raise HTTPException(status_code=404, detail="Tenant profile not found")
+        result = generate_tenant_certificate()
+        profile.cert_private_key = result.private_key_pem
+        profile.cert_public_pem = result.public_cert_pem
+        profile.cert_thumbprint = result.thumbprint
+        profile.cert_expires_at = result.expires_at
+        profile.updated_at = datetime.now(timezone.utc)
+        db.commit()
+        return TenantCertificateResponse(
+            thumbprint=result.thumbprint,
+            expires_at=result.expires_at,
+            public_cert_pem=result.public_cert_pem,
+        )
+
+
+@router.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
+def delete_tenant(profile_id: str) -> Response:
+    with SessionLocal() as db:
+        profile = db.get(TenantProfile, profile_id)
+        if not profile:
+            raise HTTPException(status_code=404, detail="Tenant profile not found")
+        # Detach jobs from this profile before deleting
+        db.execute(
+            text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
+            {"pid": profile_id},
+        )
+        db.delete(profile)
+        db.commit()
+        return Response(status_code=204)
--- a/containers/clearview/src/clearview_app/cert.py
+++ b/containers/clearview/src/clearview_app/cert.py
@ -2,7 +2,7 @@ from __future__ import annotations

 import hashlib
 from dataclasses import dataclass
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone

 from cryptography import x509
 from cryptography.hazmat.primitives import hashes, serialization
@ -30,7 +30,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
    subject = x509.Name([
        x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"),
    ])
-    expires_at = datetime.utcnow() + timedelta(days=365 * valid_years)
+    expires_at = datetime.now(timezone.utc) + timedelta(days=365 * valid_years)

    cert = (
        x509.CertificateBuilder()
@ -38,7 +38,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
        .issuer_name(subject)
        .public_key(private_key.public_key())
        .serial_number(x509.random_serial_number())
-        .not_valid_before(datetime.utcnow())
+        .not_valid_before(datetime.now(timezone.utc))
        .not_valid_after(expires_at)
        .sign(private_key, hashes.SHA256())
    )
--- a/containers/clearview/src/clearview_app/db_migrate.py
+++ b/containers/clearview/src/clearview_app/db_migrate.py
@ -0,0 +1,53 @@
+"""Database migration bootstrap.
+
+Replaces the previous ``Base.metadata.create_all`` + ``_ensure_schema_columns``
+startup path with Alembic. The bootstrap is idempotent and handles three cases:
+
+* **Fresh database** (no tables): run ``upgrade head`` to create the schema and
+  record the Alembic version.
+* **Existing pre-Alembic database** (tables present, no ``alembic_version``):
+  ``stamp head`` — adopt the baseline without re-creating existing tables.
+* **Already under Alembic**: run ``upgrade head`` to apply any new revisions.
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from alembic import command
+from alembic.config import Config
+from sqlalchemy import inspect
+
+from .db import engine
+
+log = logging.getLogger(__name__)
+
+_MIGRATIONS_DIR = Path(__file__).resolve().parent / "migrations"
+# A table that exists in every pre-Alembic Clearview database; its presence
+# (without alembic_version) marks a database that predates Alembic adoption.
+_SENTINEL_TABLE = "scan_jobs"
+
+
+def _alembic_config() -> Config:
+    cfg = Config()
+    cfg.set_main_option("script_location", str(_MIGRATIONS_DIR))
+    return cfg
+
+
+_BASELINE_REVISION = "0001_baseline"
+
+
+def run_migrations() -> None:
+    """Bring the database schema up to date (see module docstring)."""
+    cfg = _alembic_config()
+    tables = set(inspect(engine).get_table_names())
+
+    if "alembic_version" not in tables and _SENTINEL_TABLE in tables:
+        # Pre-Alembic DB: it already matches the baseline, so adopt that
+        # revision without re-creating tables, then let upgrade apply any
+        # later migrations (e.g. the timestamptz conversion in 0002).
+        log.info("Existing pre-Alembic schema detected; stamping baseline %s.", _BASELINE_REVISION)
+        command.stamp(cfg, _BASELINE_REVISION)
+
+    log.info("Applying Alembic migrations (upgrade head).")
+    command.upgrade(cfg, "head")
--- a/containers/clearview/src/clearview_app/main.py
+++ b/containers/clearview/src/clearview_app/main.py
--- a/containers/clearview/src/clearview_app/migrations/env.py
+++ b/containers/clearview/src/clearview_app/migrations/env.py
@ -0,0 +1,58 @@
+"""Alembic environment for Clearview.
+
+Reuses the application's SQLAlchemy engine (already configured with the
+normalized DATABASE_URL and pool_pre_ping) so migrations run against exactly
+the same database the app uses. Logging config from alembic.ini is applied
+only when Alembic is invoked through the CLI; programmatic invocation from
+``clearview_app.db_migrate`` passes a Config without a file.
+"""
+from __future__ import annotations
+
+from logging.config import fileConfig
+
+from alembic import context
+
+from clearview_app.config import DATABASE_URL
+from clearview_app.db import _normalize_database_url, engine as app_engine
+from clearview_app.models import Base
+
+config = context.config
+
+if config.config_file_name is not None:
+    try:
+        fileConfig(config.config_file_name)
+    except Exception:  # noqa: BLE001 - logging config is best-effort
+        pass
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    """Emit SQL to stdout without a live DB connection."""
+    context.configure(
+        url=_normalize_database_url(DATABASE_URL),
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+        compare_type=True,
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations against the live database via the app engine."""
+    with app_engine.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            compare_type=True,
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/containers/clearview/src/clearview_app/migrations/script.py.mako
+++ b/containers/clearview/src/clearview_app/migrations/script.py.mako
@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
--- a/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
+++ b/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
@ -0,0 +1,31 @@
+"""baseline schema
+
+Captures the full Clearview schema as defined by the SQLAlchemy models at the
+time Alembic was adopted. Creating it via ``Base.metadata.create_all`` keeps the
+baseline guaranteed-identical to the models (the same DDL the app emitted before
+Alembic). Existing databases are ``stamp``-ed to this revision rather than
+re-running ``upgrade`` (see ``clearview_app.db_migrate``).
+
+Revision ID: 0001_baseline
+Revises:
+Create Date: 2026-05-26
+"""
+from __future__ import annotations
+
+from alembic import op
+
+from clearview_app.models import Base
+
+# revision identifiers, used by Alembic.
+revision = "0001_baseline"
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    Base.metadata.create_all(bind=op.get_bind())
+
+
+def downgrade() -> None:
+    Base.metadata.drop_all(bind=op.get_bind())
--- a/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py
+++ b/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py
@ -0,0 +1,63 @@
+"""convert timestamp columns to timestamptz
+
+The app now uses timezone-aware UTC datetimes (DateTime(timezone=True)).
+Existing databases store naive ``timestamp without time zone`` values that were
+written as UTC, so we reinterpret them as UTC while converting. The conversion
+is guarded per column on the current type, so it is a no-op on databases whose
+columns are already ``timestamptz`` (e.g. a fresh DB created from the updated
+baseline models).
+
+Revision ID: 0002_timestamptz
+Revises: 0001_baseline
+Create Date: 2026-05-26
+"""
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "0002_timestamptz"
+down_revision = "0001_baseline"
+branch_labels = None
+depends_on = None
+
+# Table -> datetime columns (names come from our own models, never user input).
+_COLUMNS: dict[str, tuple[str, ...]] = {
+    "tenant_profiles": ("cert_expires_at", "created_at", "updated_at"),
+    "scan_jobs": ("created_at", "updated_at", "started_at", "finished_at", "heartbeat_at"),
+    "scan_targets": ("last_probe_at", "created_at", "updated_at", "started_at", "finished_at"),
+    "permission_deviations": ("created_at",),
+}
+
+
+def _column_type(bind, table: str, column: str) -> str | None:
+    return bind.execute(
+        sa.text(
+            "SELECT data_type FROM information_schema.columns "
+            "WHERE table_name = :t AND column_name = :c"
+        ),
+        {"t": table, "c": column},
+    ).scalar()
+
+
+def upgrade() -> None:
+    bind = op.get_bind()
+    for table, columns in _COLUMNS.items():
+        for column in columns:
+            if _column_type(bind, table, column) == "timestamp without time zone":
+                op.execute(
+                    f'ALTER TABLE {table} ALTER COLUMN {column} '
+                    f"TYPE timestamptz USING {column} AT TIME ZONE 'UTC'"
+                )
+
+
+def downgrade() -> None:
+    bind = op.get_bind()
+    for table, columns in _COLUMNS.items():
+        for column in columns:
+            if _column_type(bind, table, column) == "timestamp with time zone":
+                op.execute(
+                    f'ALTER TABLE {table} ALTER COLUMN {column} '
+                    f"TYPE timestamp USING {column} AT TIME ZONE 'UTC'"
+                )
--- a/containers/clearview/src/clearview_app/models.py
+++ b/containers/clearview/src/clearview_app/models.py
@ -1,11 +1,16 @@
 from __future__ import annotations

-from datetime import datetime
+from datetime import datetime, timezone

 from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship


+def _utcnow() -> datetime:
+    """Timezone-aware UTC now, used as the default for timestamp columns."""
+    return datetime.now(timezone.utc)
+
+
 class Base(DeclarativeBase):
    pass

@ -22,9 +27,9 @@ class TenantProfile(Base):
    cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
    cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
    cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
-    cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)

    jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile")

@ -56,11 +61,11 @@ class ScanJob(Base):
    warning_message: Mapped[str | None] = mapped_column(Text, nullable=True)
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)

-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)

    tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs")
    targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan")
@ -79,14 +84,14 @@ class ScanTarget(Base):
    attempts: Mapped[int] = mapped_column(Integer, default=0)
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)

-    last_probe_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
    last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
    last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)

-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
-    finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+    started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)

    job: Mapped[ScanJob] = relationship(back_populates="targets")
    deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan")
@ -108,7 +113,7 @@ class PermissionDeviation(Base):
    permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
    resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)

-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)

    job: Mapped[ScanJob] = relationship(back_populates="deviations")
    target: Mapped[ScanTarget] = relationship(back_populates="deviations")
--- a/containers/clearview/src/clearview_app/scanners/sharepoint.py
+++ b/containers/clearview/src/clearview_app/scanners/sharepoint.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import re
+import threading
 import time
 from dataclasses import dataclass
 from urllib.parse import urlparse
@ -32,7 +33,13 @@ class PermissionEntry:
    role_name: str


-_TOKEN_CACHE: dict[str, str] = {}
+# Cache maps cache_key -> (access_token, expires_at_epoch). Guarded by
+# _TOKEN_LOCK because the worker acquires tokens from multiple threads.
+_TOKEN_CACHE: dict[str, tuple[str, float]] = {}
+_TOKEN_LOCK = threading.Lock()
+# Reuse one MSAL app per (tenant, client, auth_method) so MSAL's own token
+# cache works and refreshes app tokens automatically.
+_MSAL_APPS: dict[str, "msal.ConfidentialClientApplication"] = {}


 def scan_site_for_deviations(
@ -612,18 +619,20 @@ def _probe_hint(error: str, stage: str) -> str:
    return error[:220]


-def _get_token_for_host(host: str, auth: AuthConfig) -> str:
-    auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
-    cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
-    cached = _TOKEN_CACHE.get(cache_key)
-    if cached:
-        return cached
+def _get_msal_app(auth: AuthConfig, auth_method: str) -> "msal.ConfidentialClientApplication":
+    """Return a cached ConfidentialClientApplication for these credentials.
+
+    Reusing the app object lets MSAL's built-in token cache serve and refresh
+    app-only tokens instead of re-authenticating on every call.
+    """
+    app_key = f"{auth.tenant_id}|{auth.client_id}|{auth_method}"
+    app = _MSAL_APPS.get(app_key)
+    if app is not None:
+        return app

-    scope = f"https://{host}/.default"
    authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
-
    if auth_method == "cert":
-        client_credential = {
+        client_credential: dict[str, str | None] | str | None = {
            "thumbprint": auth.cert_thumbprint,
            "private_key": auth.cert_private_key,
        }
@ -635,6 +644,21 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
        authority=authority,
        client_credential=client_credential,
    )
+    _MSAL_APPS[app_key] = app
+    return app
+
+
+def _get_token_for_host(host: str, auth: AuthConfig) -> str:
+    auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
+    cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
+
+    with _TOKEN_LOCK:
+        cached = _TOKEN_CACHE.get(cache_key)
+        if cached is not None and time.time() < cached[1]:
+            return cached[0]
+
+        scope = f"https://{host}/.default"
+        app = _get_msal_app(auth, auth_method)
        result = app.acquire_token_for_client(scopes=[scope])

        if "access_token" not in result:
@ -643,7 +667,10 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
            raise RuntimeError(f"Token request failed ({error}): {description[:300]}")

        token = str(result["access_token"])
-    _TOKEN_CACHE[cache_key] = token
+        # expires_in is seconds-from-now; refresh 60s early to avoid edge expiry.
+        expires_in = int(result.get("expires_in", 3600))
+        expires_at = time.time() + max(expires_in - 60, 0)
+        _TOKEN_CACHE[cache_key] = (token, expires_at)
        return token


--- a/containers/clearview/src/clearview_app/schemas.py
+++ b/containers/clearview/src/clearview_app/schemas.py
@ -1,9 +1,14 @@
 from __future__ import annotations

 from datetime import datetime
+from typing import Literal

 from pydantic import BaseModel, Field, HttpUrl

+# Valid scan types, mirrored by the frontend scan-type dropdowns. Used to
+# validate incoming job requests (FastAPI returns 422 on anything else).
+ScanType = Literal["sharepoint", "sharepoint_root", "mailbox", "entra_groups"]
+

 class CreateTenantProfileRequest(BaseModel):
    name: str
@ -33,7 +38,7 @@ class TenantCertificateResponse(BaseModel):


 class CreateScanJobRequest(BaseModel):
-    scan_type: str = "sharepoint"
+    scan_type: ScanType = "sharepoint"
    site_urls: list[HttpUrl] = Field(default_factory=list)
    mailboxes: list[str] = Field(default_factory=list)
    scan_all_mailboxes: bool = False
--- a/containers/clearview/src/clearview_app/version.py
+++ b/containers/clearview/src/clearview_app/version.py
@ -7,7 +7,7 @@ history, so operators can see exactly which image build is running.
 from __future__ import annotations

 VERSION = "v0.1.0"
-BUILD = 1
+BUILD = 2


 def display_version() -> str:
--- a/containers/clearview/src/clearview_app/worker.py
+++ b/containers/clearview/src/clearview_app/worker.py
@ -4,7 +4,7 @@ import logging
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
-from datetime import datetime
+from datetime import datetime, timezone

 from sqlalchemy import select

@ -47,17 +47,21 @@ class ScanWorker:

    def _process_next_job(self) -> bool:
        with SessionLocal() as db:
+            # Atomic claim: lock the chosen queued row and skip rows already
+            # locked by another worker, so multiple workers/replicas never grab
+            # the same job. The status flip is committed in this transaction.
            job = db.execute(
                select(ScanJob)
                .where(ScanJob.status == "queued")
                .order_by(ScanJob.created_at.asc())
                .limit(1)
+                .with_for_update(skip_locked=True)
            ).scalar_one_or_none()

            if job is None:
                return False

-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
            job.status = "running"
            job.started_at = now
            job.heartbeat_at = now
@ -96,7 +100,7 @@ class ScanWorker:
            job = db.get(ScanJob, job_id)
            if not job:
                return
-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
            job.heartbeat_at = now
            job.updated_at = now
            job.finished_at = now
@ -113,7 +117,7 @@ class ScanWorker:
            if not job or not target:
                return

-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
            target.status = "running"
            target.started_at = now
            target.updated_at = now
@ -128,7 +132,7 @@ class ScanWorker:
                target = db.get(ScanTarget, target_id)
                if not job or not target:
                    return
-                now = datetime.utcnow()
+                now = datetime.now(timezone.utc)
                target.status = "failed"
                target.attempts = 1
                target.error_message = f"Preflight: {probe.message}"
@ -173,7 +177,7 @@ class ScanWorker:
                            )
                        )

-                    now = datetime.utcnow()
+                    now = datetime.now(timezone.utc)
                    target.status = "completed"
                    target.attempts = attempt
                    target.error_message = None
@ -203,7 +207,7 @@ class ScanWorker:
            if not job or not target:
                return

-            now = datetime.utcnow()
+            now = datetime.now(timezone.utc)
            target.status = "failed"
            target.attempts = max_attempts
            target.error_message = last_error
@ -252,7 +256,7 @@ class ScanWorker:
        with SessionLocal() as db:
            target = db.get(ScanTarget, target_id)
            if target:
-                now = datetime.utcnow()
+                now = datetime.now(timezone.utc)
                target.last_probe_at = now
                target.last_probe_ok = result.ok
                target.last_probe_message = result.message
@ -298,8 +302,8 @@ class ScanWorker:
                        job.scan_activity = activity
                        if items > 0:
                            job.items_scanned += items
-                        job.heartbeat_at = datetime.utcnow()
-                        job.updated_at = datetime.utcnow()
+                        job.heartbeat_at = datetime.now(timezone.utc)
+                        job.updated_at = datetime.now(timezone.utc)
                        db.commit()
            except Exception:  # noqa: BLE001
                pass
--- a/docs/changelog-develop.md
+++ b/docs/changelog-develop.md
@ -2,6 +2,42 @@

 This file documents changes on the develop branch of this project.

+## 2026-05-26 — UI/UX: dead CSS removal, a11y, distinct risk colours, richer dashboard
+
+### Added
+- **Dashboard enrichment** — a fourth KPI card **With errors** (`#statErrors`, counts jobs that are `completed_with_errors` or have `failed_targets > 0`) and a **Recent jobs** panel (`#dashRecentJobs`, last 5 jobs, each row clickable to jump to its details). Populated from the existing `/api/scan-jobs` list in `refreshJobs()` via a new `renderDashRecent()`; all interpolated fields run through `escHtml()`.
+
+### Changed
+- **Removed dead CSS** — the pre-sidebar `.topbar`, `.topbar-actions`, and `.layout` rules (and their now-orphaned references inside the 930px/640px media queries) were deleted; the layout has used `.app-shell`/`.sidebar`/`.content` since the sidebar refactor.
+- **Accessibility** — focus outline strengthened from `rgba(14,165,233,0.38)` to a solid `var(--cv-accent)` (meets WCAG non-text 3:1) and now also covers `a:focus-visible`. On route changes (`applyRoute`), focus now moves to the new page's first heading (`h1/h2`, `tabindex=-1`) and `document.title` updates, so screen-reader/keyboard users land in the freshly shown content.
+- **Distinct risk colours** — the `risk.warn` badge changed from accent-blue (indistinguishable from `info`/`low`) to amber (`#854d0e` on `rgba(234,179,8,.18)`), giving a real low→high colour gradient.
+- **Consistent XSS escaping** — `job.id` and `job.source_type` in the Scan Jobs table are now passed through `escHtml()` (previously interpolated raw), matching the rest of the table.
+
+## 2026-05-26 — Split monolithic main.py into route modules
+
+### Changed
+- **`main.py` reduced from 1152 to 64 lines** — now a composition root that only wires the FastAPI app, scan-worker lifecycle, `/healthz`, `/api/version`, the `/` index + static mount, and `include_router` for the new route modules. All endpoint logic moved out verbatim (behaviour-preserving).
+- **New route modules** (flat modules at package level so existing single-dot relative imports stay unchanged — lower risk than a `routers/` subpackage): `api_tenants.py` (tenant profiles + certificate), `api_jobs.py` (all scan-job routes incl. CSV import, cancel/delete, resolve-sharing-links, resolve-groups, test-connection, Excel export, detail), `api_onboarding.py` (Microsoft connect/callback/scan-app). Shared helpers (`_resolve_credentials`, `_create_job_from_targets`, `_enumerate_all_*`, `_to_job_summary`, `_to_tenant_item`, `_build_export_filename`, `_sharing_link_risk_label`, `_extract_sharing_link_group_and_type`) extracted to `api_helpers.py`.
+- **Verified behaviour-preserving** — captured the OpenAPI route set before/after; both expose the identical 22 endpoints (`diff` empty). Built the image, booted against a fresh DB: `/healthz`, `/api/version`, `/api/tenants`, `/api/scan-jobs` all respond, invalid `scan_type` still returns 422, no startup errors.
+
+## 2026-05-26 — Correctness P1: token cache, atomic job claim, timezone-aware datetimes, scan_type validation
+
+### Changed
+- **Token cache now has TTL + thread lock + MSAL app reuse** (`scanners/sharepoint.py`) — `_TOKEN_CACHE` previously stored access tokens as plain strings forever, so long scans started failing with 401s once the ~1h token expired. It now stores `(token, expires_at)` and refreshes 60s before expiry, guarded by a new `_TOKEN_LOCK` (the worker fetches tokens from multiple threads). New `_get_msal_app()` caches one `ConfidentialClientApplication` per `(tenant, client, auth_method)` so MSAL's own token cache is reused instead of building a fresh app on every call.
+- **Atomic job claim** (`worker.py`) — the queued-job selection now uses `.with_for_update(skip_locked=True)` (`SELECT … FOR UPDATE SKIP LOCKED`), so multiple worker threads/replicas can never claim the same job. Behaviour is unchanged for the current single worker but is now replica-safe.
+- **Timezone-aware datetimes everywhere** — replaced all 24 `datetime.utcnow()` (naive, deprecated) with `datetime.now(timezone.utc)` across `models.py`, `worker.py`, `main.py`, and `cert.py`. SQLAlchemy datetime columns are now `DateTime(timezone=True)`; model defaults use a new `_utcnow()` helper. New Alembic migration `0002_timestamptz` converts existing `timestamp without time zone` columns to `timestamptz` (reinterpreting stored values as UTC), guarded per-column so it is a no-op on databases already timestamptz. **Behaviour note:** API datetimes now carry a UTC offset, so the frontend renders them correctly in local time (previously stored UTC was shown as if local).
+- **`scan_type` request validation** (`schemas.py`) — `CreateScanJobRequest.scan_type` is now `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` instead of free `str`; invalid values return HTTP 422. The response model keeps `str` so legacy rows never trigger a serialization error. Verified: `scan_type=bogus` → 422, valid type passes schema validation.
+
+## 2026-05-26 — Alembic migrations replace startup `create_all` + raw ALTERs
+
+### Added
+- **Alembic introduced (`alembic==1.14.0`)** — schema is now version-controlled instead of being patched at every startup. New `clearview_app/migrations/` package (`env.py` reuses the app's SQLAlchemy engine and `Base.metadata`; `versions/0001_baseline.py` baseline) and dev-only `containers/clearview/alembic.ini` for manual CLI use. The app builds the Alembic `Config` programmatically, so `alembic.ini` is not shipped in the image.
+- **Baseline migration `0001_baseline`** — creates the full current schema via `Base.metadata.create_all`, guaranteed identical to the models (the same DDL the app emitted before). Future schema changes become explicit Alembic revisions.
+- **Startup bootstrap `clearview_app/db_migrate.run_migrations()`** — idempotent, three cases: fresh DB → `upgrade head`; existing pre-Alembic DB (tables present, no `alembic_version`) → `stamp head` (adopt baseline without re-creating); already under Alembic → `upgrade head`. Verified end-to-end against throwaway databases (fresh upgrade, existing-DB stamp, re-run no-op) and a local image boot test (`/healthz` OK, schema + `alembic_version=0001_baseline`).
+
+### Changed
+- **`main.py` startup** — `on_startup()` now calls `run_migrations()` instead of `Base.metadata.create_all(bind=engine)` + `_ensure_schema_columns()`. The 18-statement raw `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` block (`_ensure_schema_columns`) is removed; unused `Base`/`engine` imports dropped. The existing dev/prod database is adopted automatically (stamped to baseline) on first start of the new build — no manual migration step required.
+
 ## 2026-05-26 — Build/version number in the UI (Dropkeep-style)

 ### Added