diff --git a/containers/clearview/alembic.ini b/containers/clearview/alembic.ini
new file mode 100644
index 0000000..4c8bc67
--- /dev/null
+++ b/containers/clearview/alembic.ini
@@ -0,0 +1,45 @@
+# Alembic config for manual CLI use during development, e.g.:
+# cd containers/clearview && DATABASE_URL=postgresql://... PYTHONPATH=src alembic revision -m "msg"
+#
+# The application itself does NOT read this file: clearview_app.db_migrate builds
+# an Alembic Config programmatically and env.py takes the database URL from
+# DATABASE_URL via clearview_app.config. sqlalchemy.url is therefore left blank.
+
+[alembic]
+script_location = src/clearview_app/migrations
+prepend_sys_path = src
+sqlalchemy.url =
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/containers/clearview/requirements.txt b/containers/clearview/requirements.txt
index fdc85c8..7da64eb 100644
--- a/containers/clearview/requirements.txt
+++ b/containers/clearview/requirements.txt
@@ -1,6 +1,7 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
sqlalchemy==2.0.36
+alembic==1.14.0
psycopg[binary]==3.2.3
python-multipart==0.0.12
requests==2.32.3
diff --git a/containers/clearview/site/app.js b/containers/clearview/site/app.js
index e5ce1ac..8920b97 100644
--- a/containers/clearview/site/app.js
+++ b/containers/clearview/site/app.js
@@ -104,6 +104,8 @@
statTenants: document.getElementById('statTenants'),
statJobs: document.getElementById('statJobs'),
statRunning: document.getElementById('statRunning'),
+ statErrors: document.getElementById('statErrors'),
+ dashRecentJobs: document.getElementById('dashRecentJobs'),
};
// -------------------------------------------------------------------------
@@ -610,6 +612,36 @@
// Jobs list
// -------------------------------------------------------------------------
+ function renderDashRecent(jobs) {
+ if (!els.dashRecentJobs) return;
+ if (!jobs.length) {
+ els.dashRecentJobs.innerHTML = '
| No jobs yet. |
';
+ return;
+ }
+ els.dashRecentJobs.innerHTML = jobs.slice(0, 5).map(function (job) {
+ var jobIdSafe = escHtml(job.id);
+ var tenantLabel = job.tenant_name
+ ? escHtml(job.tenant_name)
+ : 'manual';
+ var progress = job.total_targets > 0 ? (job.processed_targets + '/' + job.total_targets) : '0/0';
+ return '' +
+ '' + jobIdSafe + ' | ' +
+ '' + escHtml(job.scan_type || 'sharepoint') + ' | ' +
+ '' + tenantLabel + ' | ' +
+ '' + statusBadge(job.status) + ' | ' +
+ '' + progress + ' | ' +
+ '' + formatDate(job.updated_at) + ' | ' +
+ '
';
+ }).join('');
+ els.dashRecentJobs.querySelectorAll('[data-dash-job]').forEach(function (row) {
+ row.addEventListener('click', function () {
+ state.selectedJobId = row.getAttribute('data-dash-job');
+ navigateTo('jobs');
+ refreshSelectedJob().catch(function () {});
+ });
+ });
+ }
+
async function refreshJobs() {
const filterTenant = els.jobTenantFilter.value;
const filterType = els.jobTypeFilter ? els.jobTypeFilter.value : '';
@@ -626,6 +658,12 @@
els.statRunning.textContent = String(jobs.filter(function (j) {
return j.status === 'running' || j.status === 'queued';
}).length);
+ if (els.statErrors) {
+ els.statErrors.textContent = String(jobs.filter(function (j) {
+ return j.status === 'completed_with_errors' || (j.failed_targets || 0) > 0;
+ }).length);
+ }
+ renderDashRecent(jobs);
if (!jobs.length) {
els.jobsTableBody.innerHTML = '| No jobs yet. |
';
@@ -654,22 +692,23 @@
} else {
typeLabel = 'SharePoint';
}
+ const jobIdSafe = escHtml(job.id);
return (
'' +
- '' + job.id + ' | ' +
+ '' + jobIdSafe + ' | ' +
'' + typeLabel + ' | ' +
'' + tenantLabel + ' | ' +
- '' + job.source_type + ' | ' +
+ '' + escHtml(job.source_type) + ' | ' +
'' + statusBadge(job.status) + ' | ' +
'' + progress + ' | ' +
'' + (job.items_scanned > 0 ? job.items_scanned : '-') + ' | ' +
'' + formatDate(job.updated_at) + ' | ' +
'' +
' ' +
- '' +
+ '' +
(job.status === 'queued' || job.status === 'running'
- ? ''
- : '') +
+ ? ''
+ : '') +
' ' +
' | ' +
'
'
@@ -1527,14 +1566,16 @@
return hash;
}
- function applyRoute(route) {
+ function applyRoute(route, moveFocus) {
if (!ROUTE_TITLES[route]) {
route = 'dashboard';
}
state.currentRoute = route;
+ var activePage = null;
document.querySelectorAll('.route-page').forEach(function (page) {
if (page.getAttribute('data-route-page') === route) {
page.removeAttribute('hidden');
+ activePage = page;
} else {
page.setAttribute('hidden', '');
}
@@ -1549,6 +1590,16 @@
if (els.contentTitle) {
els.contentTitle.textContent = ROUTE_TITLES[route];
}
+ document.title = 'Clearview | ' + ROUTE_TITLES[route];
+ // On user navigation, move focus to the new page's first heading so
+ // screen-reader and keyboard users land in the freshly shown content.
+ if (moveFocus && activePage) {
+ var heading = activePage.querySelector('h1, h2');
+ if (heading) {
+ heading.setAttribute('tabindex', '-1');
+ heading.focus();
+ }
+ }
}
function navigateTo(route) {
@@ -1560,12 +1611,12 @@
if (window.location.hash !== hash) {
window.location.hash = hash;
} else {
- applyRoute(route);
+ applyRoute(route, true);
}
}
window.addEventListener('hashchange', function () {
- applyRoute(parseRoute());
+ applyRoute(parseRoute(), true);
});
applyRoute(parseRoute());
diff --git a/containers/clearview/site/index.html b/containers/clearview/site/index.html
index 2f63b0c..d9e9187 100644
--- a/containers/clearview/site/index.html
+++ b/containers/clearview/site/index.html
@@ -73,6 +73,33 @@
0
Active Jobs
+
+ 0
+ With errors
+
+
+
+
+
+
+
+
+
+
+ | Job ID |
+ Type |
+ Tenant |
+ Status |
+ Targets |
+ Updated |
+
+
+
+ | No jobs yet. |
+
+
diff --git a/containers/clearview/site/styles.css b/containers/clearview/site/styles.css
index 456d659..44f6172 100644
--- a/containers/clearview/site/styles.css
+++ b/containers/clearview/site/styles.css
@@ -55,38 +55,12 @@ body {
background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0));
}
-.topbar {
- width: min(1100px, calc(100% - 2rem));
- margin: 1.1rem auto 0;
- padding: 0.95rem 1.1rem;
- border: 1px solid var(--cv-border);
- border-radius: 18px;
- background: rgba(255, 255, 255, 0.75);
- backdrop-filter: blur(8px);
- display: flex;
- align-items: center;
- justify-content: space-between;
- box-shadow: 0 10px 24px rgba(20, 20, 19, 0.08);
-}
-
.brand-logo {
height: 42px;
width: auto;
display: block;
}
-.topbar-actions {
- display: flex;
- gap: 0.6rem;
-}
-
-.layout {
- width: min(1100px, calc(100% - 2rem));
- margin: 1rem auto 2.5rem;
- display: grid;
- gap: 1rem;
-}
-
.hero,
.panel {
border-radius: 22px;
@@ -131,7 +105,7 @@ h2 {
.hero-stats {
margin-top: 1.3rem;
display: grid;
- grid-template-columns: repeat(3, minmax(0, 1fr));
+ grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 0.75rem;
}
@@ -291,8 +265,9 @@ textarea {
input:focus,
select:focus,
textarea:focus,
-button:focus {
- outline: 2px solid rgba(14, 165, 233, 0.38);
+button:focus,
+a:focus-visible {
+ outline: 2px solid var(--cv-accent);
outline-offset: 2px;
}
@@ -533,8 +508,8 @@ strong {
}
.risk.warn {
- background: rgba(14, 165, 233, 0.15);
- color: var(--cv-accent-dark);
+ background: rgba(234, 179, 8, 0.18);
+ color: #854d0e;
}
.risk.high {
@@ -584,12 +559,6 @@ strong {
}
@media (max-width: 930px) {
- .topbar {
- flex-direction: column;
- align-items: flex-start;
- gap: 0.8rem;
- }
-
.hero-stats {
grid-template-columns: 1fr;
}
@@ -616,11 +585,6 @@ strong {
}
@media (max-width: 640px) {
- .layout,
- .topbar {
- width: calc(100% - 1rem);
- }
-
.hero,
.panel {
border-radius: 16px;
@@ -633,14 +597,6 @@ strong {
.hero h1 {
max-width: none;
}
-
- .topbar-actions {
- width: 100%;
- }
-
- .topbar-actions .btn {
- flex: 1;
- }
}
/* ===========================================================================
diff --git a/containers/clearview/src/clearview_app/api_helpers.py b/containers/clearview/src/clearview_app/api_helpers.py
new file mode 100644
index 0000000..721426a
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_helpers.py
@@ -0,0 +1,321 @@
+"""Shared helpers for the API route modules.
+
+Extracted verbatim from the original monolithic ``main.py`` so the route
+modules (``api_tenants``, ``api_jobs``) can share credential resolution, job
+creation, response mapping, and export helpers without circular imports.
+"""
+from __future__ import annotations
+
+import re
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import HTTPException
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from .db import SessionLocal
+from .default_sites import is_default_site, normalize_site_url
+from .models import ScanJob, ScanTarget, TenantProfile
+from .scanners import AuthConfig
+from .schemas import ScanJobCreateResponse, ScanJobSummary, TenantProfileItem
+
+
+def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
+ """
+ Extract (group_name, link_type) from principal values such as:
+ - SharingLinks...
+ - c:0o.c|federateddirectoryclaimprovider|SharingLinks...
+ """
+ if not principal:
+ return None
+
+ text = principal.strip()
+ segments = [s.strip() for s in text.split("|") if s.strip()]
+
+ candidate = ""
+ for segment in reversed(segments):
+ if segment.lower().startswith("sharinglinks."):
+ candidate = segment
+ break
+ if not candidate and text.lower().startswith("sharinglinks."):
+ candidate = text
+ if not candidate:
+ return None
+
+ parts = candidate.split(".")
+ if len(parts) < 3:
+ return None
+ return candidate, parts[2]
+
+
+_SCAN_TYPE_LABELS = {
+ "sharepoint": "Deviations",
+ "sharepoint_root": "Root",
+ "mailbox": "Mailbox",
+ "entra_groups": "EntraGroups",
+}
+
+
+def _build_export_filename(job: ScanJob, job_id: str) -> str:
+ tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
+ safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
+ scan_type = job.scan_type or "sharepoint"
+ type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
+ short_id = job_id.replace("-", "")[-12:]
+ return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
+
+
+def _enumerate_all_entra_groups(
+ tenant_id: str,
+ client_id: str,
+ client_secret: str | None,
+ profile_id: str | None,
+) -> list[str]:
+ cert_private_key: str | None = None
+ cert_thumbprint: str | None = None
+ cert_public_pem: str | None = None
+ if profile_id:
+ with SessionLocal() as db:
+ profile = db.get(TenantProfile, profile_id)
+ if profile:
+ cert_private_key = profile.cert_private_key
+ cert_thumbprint = profile.cert_thumbprint
+ cert_public_pem = profile.cert_public_pem
+
+ auth = AuthConfig(
+ tenant_id=tenant_id,
+ client_id=client_id,
+ client_secret=client_secret or "",
+ cert_private_key=cert_private_key,
+ cert_thumbprint=cert_thumbprint,
+ cert_public_pem=cert_public_pem,
+ )
+
+ from .scanners import entra as _entra
+
+ try:
+ return _entra.list_all_groups(auth)
+ except Exception as exc: # noqa: BLE001
+ raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
+
+
+def _enumerate_all_mailboxes(
+ organization: str | None,
+ tenant_id: str,
+ client_id: str,
+ client_secret: str | None,
+ profile_id: str | None,
+) -> list[str]:
+ if not organization or "." not in organization:
+ raise HTTPException(
+ status_code=400,
+ detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
+ )
+
+ cert_private_key: str | None = None
+ cert_thumbprint: str | None = None
+ cert_public_pem: str | None = None
+ if profile_id:
+ with SessionLocal() as db:
+ profile = db.get(TenantProfile, profile_id)
+ if profile:
+ cert_private_key = profile.cert_private_key
+ cert_thumbprint = profile.cert_thumbprint
+ cert_public_pem = profile.cert_public_pem
+
+ auth = AuthConfig(
+ tenant_id=tenant_id,
+ client_id=client_id,
+ client_secret=client_secret or "",
+ cert_private_key=cert_private_key,
+ cert_thumbprint=cert_thumbprint,
+ cert_public_pem=cert_public_pem,
+ )
+
+ from .scanners import mailbox as _mailbox
+
+ try:
+ return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
+ except Exception as exc: # noqa: BLE001
+ raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
+
+
+def _resolve_credentials(
+ db,
+ tenant_profile_id: str | None,
+ tenant_id: str | None,
+ client_id: str | None,
+ client_secret: str | None,
+) -> tuple[str, str, str | None, str | None]:
+ if tenant_profile_id:
+ profile = db.get(TenantProfile, tenant_profile_id)
+ if not profile:
+ raise HTTPException(status_code=404, detail="Tenant profile not found")
+ if not profile.client_secret and not profile.cert_thumbprint:
+ raise HTTPException(
+ status_code=400,
+ detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
+ )
+ return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
+ if tenant_id and client_id and client_secret:
+ return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
+ raise HTTPException(
+ status_code=400,
+ detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
+ )
+
+
+def _create_job_from_targets(
+ raw_targets: list[str],
+ scan_type: str,
+ skip_default_sites: bool,
+ source_type: str,
+ tenant_id: str,
+ client_id: str,
+ client_secret: str,
+ tenant_profile_id: str | None = None,
+) -> ScanJobCreateResponse:
+ accepted: list[str] = []
+ skipped_default_urls: list[str] = []
+ invalid: list[str] = []
+
+ seen: set[str] = set()
+
+ for raw in raw_targets:
+ if scan_type == "mailbox":
+ normalized = (raw or "").strip().lower()
+ if not normalized or "@" not in normalized:
+ invalid.append(raw)
+ continue
+ elif scan_type == "entra_groups":
+ normalized = (raw or "").strip()
+ if not normalized:
+ invalid.append(raw)
+ continue
+ else:
+ normalized = normalize_site_url(raw) or ""
+ if not normalized:
+ invalid.append(raw)
+ continue
+
+ if normalized in seen:
+ continue
+ seen.add(normalized)
+
+ if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
+ skipped_default_urls.append(normalized)
+ continue
+
+ accepted.append(normalized)
+
+ with SessionLocal() as db:
+ now = datetime.now(timezone.utc)
+ job = ScanJob(
+ id=str(uuid.uuid4()),
+ source_type=source_type,
+ scan_type=scan_type,
+ status="queued" if accepted else "completed",
+ skip_default_sites=skip_default_sites,
+ tenant_profile_id=tenant_profile_id,
+ auth_tenant_id=tenant_id,
+ auth_client_id=client_id,
+ auth_client_secret=client_secret,
+ total_targets=len(accepted),
+ skipped_targets=len(skipped_default_urls),
+ warning_message=None,
+ error_message=None,
+ created_at=now,
+ updated_at=now,
+ finished_at=now if not accepted else None,
+ )
+
+ if not accepted:
+ if scan_type == "mailbox":
+ job.warning_message = "No scannable mailboxes after validation"
+ else:
+ job.warning_message = "No scannable sites after validation and default-site filtering"
+
+ db.add(job)
+ db.flush()
+
+ for index, target in enumerate(accepted, start=1):
+ db.add(
+ ScanTarget(
+ job_id=job.id,
+ site_url=target,
+ source_row=index,
+ status="queued",
+ attempts=0,
+ created_at=now,
+ updated_at=now,
+ )
+ )
+
+ db.commit()
+
+ stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
+ job = db.execute(stmt).unique().scalar_one()
+
+ return ScanJobCreateResponse(
+ job=_to_job_summary(job),
+ accepted_urls=accepted,
+ skipped_default_urls=skipped_default_urls,
+ invalid_urls=invalid,
+ )
+
+
+def _to_job_summary(job: ScanJob) -> ScanJobSummary:
+ return ScanJobSummary(
+ id=job.id,
+ status=job.status,
+ source_type=job.source_type,
+ scan_type=job.scan_type or "sharepoint",
+ skip_default_sites=job.skip_default_sites,
+ tenant_profile_id=job.tenant_profile_id,
+ tenant_name=job.tenant_profile.name if job.tenant_profile else None,
+ total_targets=job.total_targets,
+ processed_targets=job.processed_targets,
+ successful_targets=job.successful_targets,
+ failed_targets=job.failed_targets,
+ skipped_targets=job.skipped_targets,
+ items_scanned=job.items_scanned,
+ scan_activity=job.scan_activity if job.status == "running" else None,
+ warning_message=job.warning_message,
+ error_message=job.error_message,
+ created_at=job.created_at,
+ updated_at=job.updated_at,
+ started_at=job.started_at,
+ finished_at=job.finished_at,
+ )
+
+
+def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
+ return TenantProfileItem(
+ id=profile.id,
+ name=profile.name,
+ tenant_id=profile.tenant_id,
+ primary_domain=profile.primary_domain,
+ client_id=profile.client_id,
+ has_certificate=bool(profile.cert_thumbprint),
+ cert_thumbprint=profile.cert_thumbprint,
+ cert_expires_at=profile.cert_expires_at,
+ created_at=profile.created_at,
+ updated_at=profile.updated_at,
+ )
+
+
+def _sharing_link_risk_label(principal: str) -> str:
+ if not principal.startswith("SharingLinks."):
+ return ""
+ parts = principal.split(".", 3)
+ link_type = parts[2] if len(parts) >= 3 else ""
+ if link_type.startswith("Anonymous"):
+ return "Critical"
+ if link_type == "Flexible":
+ return "High"
+ if link_type.startswith("Organization"):
+ return "Low"
+ if link_type.startswith("Direct"):
+ return "Low"
+ return "Unknown"
diff --git a/containers/clearview/src/clearview_app/api_jobs.py b/containers/clearview/src/clearview_app/api_jobs.py
new file mode 100644
index 0000000..c9cb6a4
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_jobs.py
@@ -0,0 +1,645 @@
+"""Scan-job routes: create, list, inspect, cancel, delete, resolve, export."""
+from __future__ import annotations
+
+import io
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+from fastapi.responses import Response, StreamingResponse
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from .api_helpers import (
+ _build_export_filename,
+ _create_job_from_targets,
+ _enumerate_all_entra_groups,
+ _enumerate_all_mailboxes,
+ _extract_sharing_link_group_and_type,
+ _resolve_credentials,
+ _sharing_link_risk_label,
+ _to_job_summary,
+)
+from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
+from .db import SessionLocal
+from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
+from .scanners import AuthConfig, probe
+from .schemas import (
+ CreateScanJobRequest,
+ PermissionDeviationItem,
+ ProbeResultResponse,
+ ResolveGroupsResponse,
+ ResolveSharingLinksRequest,
+ ResolveSharingLinksResponse,
+ ScanJobCreateResponse,
+ ScanJobDetail,
+ ScanJobSummary,
+ ScanTargetItem,
+ SharingLinkTypesResponse,
+)
+
+router = APIRouter()
+
+
+@router.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
+def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
+ with SessionLocal() as db:
+ tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
+ db=db,
+ tenant_profile_id=payload.tenant_profile_id,
+ tenant_id=payload.tenant_id,
+ client_id=payload.client_id,
+ client_secret=payload.client_secret,
+ )
+ source_type = "manual"
+ if payload.scan_type == "entra_groups":
+ if payload.scan_all_groups:
+ raw_targets = _enumerate_all_entra_groups(
+ tenant_id=tenant_id,
+ client_id=client_id,
+ client_secret=client_secret,
+ profile_id=profile_id,
+ )
+ source_type = "tenant_all"
+ else:
+ raw_targets = [str(g) for g in payload.group_ids]
+ elif payload.scan_type == "mailbox":
+ if payload.scan_all_mailboxes:
+ organization = payload.organization
+ if (not organization) and profile_id:
+ with SessionLocal() as db:
+ profile = db.get(TenantProfile, profile_id)
+ if profile and profile.primary_domain:
+ organization = profile.primary_domain
+ raw_targets = _enumerate_all_mailboxes(
+ organization=organization,
+ tenant_id=tenant_id,
+ client_id=client_id,
+ client_secret=client_secret,
+ profile_id=profile_id,
+ )
+ source_type = "tenant_all"
+ else:
+ raw_targets = [str(m) for m in payload.mailboxes]
+ else:
+ raw_targets = [str(item) for item in payload.site_urls]
+ return _create_job_from_targets(
+ raw_targets=raw_targets,
+ scan_type=payload.scan_type,
+ skip_default_sites=payload.skip_default_sites,
+ source_type=source_type,
+ tenant_id=tenant_id,
+ client_id=client_id,
+ client_secret=client_secret,
+ tenant_profile_id=profile_id,
+ )
+
+
+@router.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
+def create_scan_job_from_csv(
+ skip_default_sites: bool = True,
+ scan_type: str = Form("sharepoint"),
+ tenant_profile_id: str | None = Form(None),
+ tenant_id: str | None = Form(None),
+ client_id: str | None = Form(None),
+ client_secret: str | None = Form(None),
+ file: UploadFile = File(...),
+) -> ScanJobCreateResponse:
+ with SessionLocal() as db:
+ resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
+ db=db,
+ tenant_profile_id=tenant_profile_id,
+ tenant_id=tenant_id,
+ client_id=client_id,
+ client_secret=client_secret,
+ )
+ content = file.file.read()
+ if scan_type == "mailbox":
+ parsed = parse_mailboxes_csv(content)
+ targets = parsed.mailboxes
+ elif scan_type == "entra_groups":
+ parsed = parse_entra_groups_csv(content)
+ targets = parsed.urls
+ else:
+ parsed = parse_sites_csv(content)
+ targets = parsed.urls
+ response = _create_job_from_targets(
+ raw_targets=targets,
+ scan_type=scan_type,
+ skip_default_sites=skip_default_sites,
+ source_type="csv",
+ tenant_id=resolved_tenant_id,
+ client_id=resolved_client_id,
+ client_secret=resolved_client_secret,
+ tenant_profile_id=profile_id,
+ )
+
+ if parsed.invalid_rows:
+ csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
+ with SessionLocal() as db:
+ job = db.get(ScanJob, response.job.id)
+ if job:
+ if job.warning_message:
+ job.warning_message = f"{job.warning_message} | {csv_warning}"
+ else:
+ job.warning_message = csv_warning
+ job.updated_at = datetime.now(timezone.utc)
+ db.commit()
+ db.refresh(job)
+ response.job.warning_message = job.warning_message
+
+ return response
+
+
+@router.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
+def cancel_scan_job(job_id: str) -> ScanJobSummary:
+ with SessionLocal() as db:
+ stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
+ job = db.execute(stmt).unique().scalar_one_or_none()
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+ if job.status not in ("queued", "running"):
+ raise HTTPException(status_code=409, detail="Job is not queued or running")
+ now = datetime.now(timezone.utc)
+ job.status = "cancelled"
+ job.updated_at = now
+ job.finished_at = now
+ job.scan_activity = None
+ db.commit()
+ db.refresh(job)
+ stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
+ job = db.execute(stmt).unique().scalar_one()
+ return _to_job_summary(job)
+
+
+@router.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
+def delete_scan_job(job_id: str) -> Response:
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id)
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+ if job.status in ("queued", "running"):
+ raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
+ db.delete(job)
+ db.commit()
+ return Response(status_code=204)
+
+
+@router.get("/api/scan-jobs", response_model=list[ScanJobSummary])
+def list_scan_jobs(
+ limit: int = 20,
+ tenant_profile_id: str | None = None,
+ scan_type: str | None = None,
+) -> list[ScanJobSummary]:
+ with SessionLocal() as db:
+ stmt = (
+ select(ScanJob)
+ .options(joinedload(ScanJob.tenant_profile))
+ .order_by(ScanJob.created_at.desc())
+ .limit(max(1, min(limit, 100)))
+ )
+ if tenant_profile_id:
+ stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
+ if scan_type:
+ stmt = stmt.where(ScanJob.scan_type == scan_type)
+ jobs = list(db.execute(stmt).unique().scalars())
+ return [_to_job_summary(job) for job in jobs]
+
+
+@router.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
+def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id)
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+
+ principals = list(
+ db.execute(
+ select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
+ ).scalars()
+ )
+
+ type_counts: dict[str, int] = {}
+ for principal in principals:
+ parsed = _extract_sharing_link_group_and_type(str(principal or ""))
+ if not parsed:
+ continue
+ _group_name, link_type = parsed
+ type_counts[link_type] = type_counts.get(link_type, 0) + 1
+
+ return SharingLinkTypesResponse(type_counts=type_counts)
+
+
+@router.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
+def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
+ from .scanner import resolve_sharing_link_members
+
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id)
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+ if job.status in ("queued", "running"):
+ raise HTTPException(status_code=409, detail="Job is still running")
+
+ cert_private_key: str | None = None
+ cert_thumbprint: str | None = None
+ cert_public_pem: str | None = None
+ if job.tenant_profile_id:
+ profile = db.get(TenantProfile, job.tenant_profile_id)
+ if profile:
+ cert_private_key = profile.cert_private_key
+ cert_thumbprint = profile.cert_thumbprint
+ cert_public_pem = profile.cert_public_pem
+
+ auth = AuthConfig(
+ tenant_id=job.auth_tenant_id or "",
+ client_id=job.auth_client_id or "",
+ client_secret=job.auth_client_secret or "",
+ cert_private_key=cert_private_key,
+ cert_thumbprint=cert_thumbprint,
+ cert_public_pem=cert_public_pem,
+ )
+
+ all_deviations = list(
+ db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
+ )
+
+ # Group by (site_url, principal) so each unique group is resolved once
+ groups: dict[tuple[str, str], list[int]] = {}
+ for dev in all_deviations:
+ parsed = _extract_sharing_link_group_and_type(dev.principal)
+ if not parsed:
+ continue
+ group_name, link_type = parsed
+ if link_type not in payload.link_types:
+ continue
+ key = (dev.site_url, group_name)
+ groups.setdefault(key, []).append(dev.id)
+
+ updated_deviations = 0
+ for (site_url, group_name), dev_ids in groups.items():
+ members = resolve_sharing_link_members(site_url, group_name, auth)
+ resolved_members = ", ".join(members) if members else ""
+ with SessionLocal() as db:
+ for dev_id in dev_ids:
+ dev = db.get(PermissionDeviation, dev_id)
+ if dev:
+ dev.resolved_members = resolved_members
+ db.commit()
+ updated_deviations += len(dev_ids)
+
+ return ResolveSharingLinksResponse(
+ resolved_groups=len(groups),
+ updated_deviations=updated_deviations,
+ )
+
+
+@router.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
+def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
+ """
+ Expand group principals on this job's deviations and write each group's
+ member list to permission_deviations.resolved_members. Handles both
+ classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
+ assigned directly at root (via Microsoft Graph). Skips email-shape users
+ and SharingLinks groups (those have their own resolver).
+ """
+ from .scanners.sharepoint import (
+ is_aad_group_principal,
+ is_sharepoint_group_principal,
+ resolve_aad_group_members,
+ resolve_sharing_link_members,
+ )
+
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id)
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+ if job.status in ("queued", "running"):
+ raise HTTPException(status_code=409, detail="Job is still running")
+ if (job.scan_type or "sharepoint") == "mailbox":
+ raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
+
+ cert_private_key: str | None = None
+ cert_thumbprint: str | None = None
+ cert_public_pem: str | None = None
+ if job.tenant_profile_id:
+ profile = db.get(TenantProfile, job.tenant_profile_id)
+ if profile:
+ cert_private_key = profile.cert_private_key
+ cert_thumbprint = profile.cert_thumbprint
+ cert_public_pem = profile.cert_public_pem
+
+ auth = AuthConfig(
+ tenant_id=job.auth_tenant_id or "",
+ client_id=job.auth_client_id or "",
+ client_secret=job.auth_client_secret or "",
+ cert_private_key=cert_private_key,
+ cert_thumbprint=cert_thumbprint,
+ cert_public_pem=cert_public_pem,
+ )
+
+ all_deviations = list(
+ db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
+ )
+
+ # Group deviations by (site_url, principal) so each unique group is resolved once
+ groups: dict[tuple[str, str], list[int]] = {}
+ for dev in all_deviations:
+ if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
+ continue
+ key = (dev.site_url, dev.principal)
+ groups.setdefault(key, []).append(dev.id)
+
+ resolved = 0
+ skipped = 0
+ updated = 0
+ for (site_url, group_name), dev_ids in groups.items():
+ try:
+ if is_aad_group_principal(group_name):
+ members = resolve_aad_group_members(group_name, auth)
+ else:
+ members = resolve_sharing_link_members(site_url, group_name, auth)
+ except Exception: # noqa: BLE001
+ members = []
+
+ if not members:
+ skipped += 1
+ continue
+
+ resolved_text = ", ".join(members)
+ with SessionLocal() as db:
+ for dev_id in dev_ids:
+ dev = db.get(PermissionDeviation, dev_id)
+ if dev:
+ dev.resolved_members = resolved_text
+ db.commit()
+ resolved += 1
+ updated += len(dev_ids)
+
+ return ResolveGroupsResponse(
+ resolved_groups=resolved,
+ skipped_groups=skipped,
+ updated_deviations=updated,
+ )
+
+
+@router.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
+def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id)
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+ target = db.get(ScanTarget, target_id)
+ if not target or target.job_id != job_id:
+ raise HTTPException(status_code=404, detail="Target not found")
+ if job.status in ("queued", "running"):
+ raise HTTPException(status_code=409, detail="Job is still running")
+
+ cert_private_key: str | None = None
+ cert_thumbprint: str | None = None
+ cert_public_pem: str | None = None
+ if job.tenant_profile_id:
+ profile = db.get(TenantProfile, job.tenant_profile_id)
+ if profile:
+ cert_private_key = profile.cert_private_key
+ cert_thumbprint = profile.cert_thumbprint
+ cert_public_pem = profile.cert_public_pem
+
+ auth = AuthConfig(
+ tenant_id=job.auth_tenant_id or "",
+ client_id=job.auth_client_id or "",
+ client_secret=job.auth_client_secret or "",
+ cert_private_key=cert_private_key,
+ cert_thumbprint=cert_thumbprint,
+ cert_public_pem=cert_public_pem,
+ )
+ site_url = target.site_url
+ job_scan_type = job.scan_type or "sharepoint"
+
+ result = probe(job_scan_type, site_url, auth)
+
+ with SessionLocal() as db:
+ target = db.get(ScanTarget, target_id)
+ if not target:
+ raise HTTPException(status_code=404, detail="Target not found")
+ now = datetime.now(timezone.utc)
+ target.last_probe_at = now
+ target.last_probe_ok = result.ok
+ target.last_probe_message = result.message
+ target.updated_at = now
+ db.commit()
+ db.refresh(target)
+ return ProbeResultResponse(
+ target_id=target.id,
+ ok=result.ok,
+ message=result.message,
+ last_probe_at=target.last_probe_at,
+ )
+
+
+@router.get("/api/scan-jobs/{job_id}/export")
+def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
+ import openpyxl
+ from openpyxl.styles import Font, PatternFill
+
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+
+ targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
+ if site_url:
+ targets_q = targets_q.where(ScanTarget.site_url == site_url)
+ targets = list(db.execute(targets_q).scalars())
+
+ deviations_q = (
+ select(PermissionDeviation)
+ .where(PermissionDeviation.job_id == job.id)
+ .order_by(PermissionDeviation.id.desc())
+ )
+ if site_url:
+ deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
+ deviations = list(db.execute(deviations_q).scalars())
+
+ wb = openpyxl.Workbook()
+ header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
+ header_font_white = Font(bold=True, color="FFFFFF")
+
+ _risk_styles: dict[str, tuple] = {
+ "Critical": (
+ PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
+ Font(bold=True, color="7B0000"),
+ ),
+ "High": (
+ PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
+ Font(bold=True, color="7C2D00"),
+ ),
+ "Low": (
+ PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
+ Font(bold=True, color="0C4A6E"),
+ ),
+ "Unknown": (
+ PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
+ Font(bold=True, color="555555"),
+ ),
+ }
+
+ def _style_header(ws, headers):
+ ws.append(headers)
+ for cell in ws[1]:
+ cell.font = header_font_white
+ cell.fill = header_fill
+
+ scan_type = job.scan_type or "sharepoint"
+
+ target_label = {
+ "sharepoint": "Site URL",
+ "sharepoint_root": "Site URL",
+ "mailbox": "Mailbox",
+ "entra_groups": "Group",
+ }.get(scan_type, "Target")
+
+ # Targets sheet
+ ws_targets = wb.active
+ ws_targets.title = "Targets"
+ _style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
+ for t in targets:
+ ws_targets.append([
+ t.site_url,
+ t.status,
+ t.attempts,
+ t.error_message or "",
+ t.started_at.isoformat() if t.started_at else "",
+ t.finished_at.isoformat() if t.finished_at else "",
+ ])
+ for col in ws_targets.columns:
+ ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
+
+ # Results sheet — name and columns depend on scan type
+ if scan_type == "mailbox":
+ ws_dev = wb.create_sheet("Mailbox Permissions")
+ _style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
+ deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
+ for d in deviations:
+ ws_dev.append([
+ d.site_url,
+ d.object_url,
+ d.permission_type or d.object_type,
+ d.principal,
+ d.role_name,
+ ])
+ elif scan_type == "entra_groups":
+ ws_dev = wb.create_sheet("Group Memberships")
+ _style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
+ deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
+ for d in deviations:
+ ws_dev.append([
+ d.object_url,
+ d.permission_type or "",
+ d.principal,
+ d.role_name,
+ ])
+ elif scan_type == "sharepoint_root":
+ ws_dev = wb.create_sheet("Root Permissions")
+ _style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
+ deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
+ for d in deviations:
+ ws_dev.append([
+ d.site_url,
+ d.principal,
+ d.resolved_members or "",
+ d.role_name,
+ ])
+ else:
+ ws_dev = wb.create_sheet("Deviations")
+ _style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
+ deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
+ for d in deviations:
+ base = (d.site_url or "").rstrip("/")
+ obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
+ link_risk = _sharing_link_risk_label(d.principal)
+ ws_dev.append([
+ d.site_url,
+ obj_rel,
+ d.object_type,
+ d.principal,
+ link_risk,
+ d.resolved_members or "",
+ d.role_name,
+ d.delta_type,
+ ])
+ if link_risk in _risk_styles:
+ risk_fill, risk_font = _risk_styles[link_risk]
+ risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
+ risk_cell.fill = risk_fill
+ risk_cell.font = risk_font
+ for col in ws_dev.columns:
+ ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
+
+ buf = io.BytesIO()
+ wb.save(buf)
+ buf.seek(0)
+
+ filename = _build_export_filename(job, job_id)
+ return StreamingResponse(
+ buf,
+ media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+ )
+
+
+@router.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
+def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
+ with SessionLocal() as db:
+ job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
+ if not job:
+ raise HTTPException(status_code=404, detail="Job not found")
+
+ targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
+ if site_url:
+ targets_q = targets_q.where(ScanTarget.site_url == site_url)
+ targets = list(db.execute(targets_q).scalars())
+
+ deviations_q = (
+ select(PermissionDeviation)
+ .where(PermissionDeviation.job_id == job.id)
+ .order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
+ )
+ if site_url:
+ deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
+ else:
+ deviations_q = deviations_q.limit(1000)
+ deviations = list(db.execute(deviations_q).scalars())
+
+ return ScanJobDetail(
+ **_to_job_summary(job).model_dump(),
+ targets=[
+ ScanTargetItem(
+ id=t.id,
+ site_url=t.site_url,
+ status=t.status,
+ attempts=t.attempts,
+ error_message=t.error_message,
+ started_at=t.started_at,
+ finished_at=t.finished_at,
+ last_probe_at=t.last_probe_at,
+ last_probe_ok=t.last_probe_ok,
+ last_probe_message=t.last_probe_message,
+ )
+ for t in targets
+ ],
+ deviations=[
+ PermissionDeviationItem(
+ id=d.id,
+ site_url=d.site_url,
+ object_url=d.object_url,
+ object_type=d.object_type,
+ principal=d.principal,
+ role_name=d.role_name,
+ delta_type=d.delta_type,
+ permission_type=d.permission_type,
+ resolved_members=d.resolved_members,
+ created_at=d.created_at,
+ )
+ for d in deviations
+ ],
+ )
diff --git a/containers/clearview/src/clearview_app/api_onboarding.py b/containers/clearview/src/clearview_app/api_onboarding.py
new file mode 100644
index 0000000..979107c
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_onboarding.py
@@ -0,0 +1,76 @@
+"""Microsoft onboarding routes (admin-consent connect + scan-app creation)."""
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import RedirectResponse
+
+from .onboarding import (
+ OnboardingError,
+ consume_callback_state,
+ create_connect_url,
+ create_scan_app_for_tenant,
+)
+from .schemas import (
+ ConnectMicrosoftResponse,
+ CreateScanAppRequest,
+ CreateScanAppResponse,
+)
+
+router = APIRouter()
+
+
+@router.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
+def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
+ try:
+ result = create_scan_app_for_tenant(
+ tenant_id=payload.tenant_id,
+ display_name=payload.display_name,
+ )
+ except OnboardingError as exc:
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
+ except Exception as exc: # noqa: BLE001
+ raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
+
+ return CreateScanAppResponse(
+ tenant_id=result.tenant_id,
+ client_id=result.client_id,
+ client_secret=result.client_secret,
+ app_object_id=result.app_object_id,
+ service_principal_id=result.service_principal_id,
+ display_name=result.display_name,
+ )
+
+
+@router.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
+def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
+ try:
+ return ConnectMicrosoftResponse(connect_url=create_connect_url())
+ except OnboardingError as exc:
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/api/onboarding/microsoft/callback")
+def onboarding_microsoft_callback(
+ tenant: str | None = None,
+ state: str | None = None,
+ error: str | None = None,
+ error_description: str | None = None,
+) -> RedirectResponse:
+ if error:
+ message = (error_description or error).replace(" ", "+")
+ return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
+
+ if not state or not consume_callback_state(state):
+ return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
+
+ if not tenant:
+ return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
+
+ return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
+
+
+@router.get("/api/onboarding/status")
+def onboarding_status() -> dict[str, bool]:
+ from . import config
+ automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
+ return {"automated_available": automated}
diff --git a/containers/clearview/src/clearview_app/api_tenants.py b/containers/clearview/src/clearview_app/api_tenants.py
new file mode 100644
index 0000000..f8cb473
--- /dev/null
+++ b/containers/clearview/src/clearview_app/api_tenants.py
@@ -0,0 +1,86 @@
+"""Tenant profile + certificate routes."""
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import Response
+from sqlalchemy import select, text
+
+from .api_helpers import _to_tenant_item
+from .cert import generate_tenant_certificate
+from .db import SessionLocal
+from .models import TenantProfile
+from .schemas import (
+ CreateTenantProfileRequest,
+ TenantCertificateResponse,
+ TenantProfileItem,
+)
+
+router = APIRouter()
+
+
+@router.get("/api/tenants", response_model=list[TenantProfileItem])
+def list_tenants() -> list[TenantProfileItem]:
+ with SessionLocal() as db:
+ profiles = list(
+ db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
+ )
+ return [_to_tenant_item(p) for p in profiles]
+
+
+@router.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
+def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
+ with SessionLocal() as db:
+ now = datetime.now(timezone.utc)
+ profile = TenantProfile(
+ id=str(uuid.uuid4()),
+ name=payload.name.strip(),
+ tenant_id=payload.tenant_id.strip(),
+ primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
+ client_id=payload.client_id.strip(),
+ client_secret=payload.client_secret.strip() if payload.client_secret else None,
+ created_at=now,
+ updated_at=now,
+ )
+ db.add(profile)
+ db.commit()
+ db.refresh(profile)
+ return _to_tenant_item(profile)
+
+
+@router.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
+def generate_certificate(profile_id: str) -> TenantCertificateResponse:
+ with SessionLocal() as db:
+ profile = db.get(TenantProfile, profile_id)
+ if not profile:
+ raise HTTPException(status_code=404, detail="Tenant profile not found")
+ result = generate_tenant_certificate()
+ profile.cert_private_key = result.private_key_pem
+ profile.cert_public_pem = result.public_cert_pem
+ profile.cert_thumbprint = result.thumbprint
+ profile.cert_expires_at = result.expires_at
+ profile.updated_at = datetime.now(timezone.utc)
+ db.commit()
+ return TenantCertificateResponse(
+ thumbprint=result.thumbprint,
+ expires_at=result.expires_at,
+ public_cert_pem=result.public_cert_pem,
+ )
+
+
+@router.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
+def delete_tenant(profile_id: str) -> Response:
+ with SessionLocal() as db:
+ profile = db.get(TenantProfile, profile_id)
+ if not profile:
+ raise HTTPException(status_code=404, detail="Tenant profile not found")
+ # Detach jobs from this profile before deleting
+ db.execute(
+ text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
+ {"pid": profile_id},
+ )
+ db.delete(profile)
+ db.commit()
+ return Response(status_code=204)
diff --git a/containers/clearview/src/clearview_app/cert.py b/containers/clearview/src/clearview_app/cert.py
index accbb54..85f035f 100644
--- a/containers/clearview/src/clearview_app/cert.py
+++ b/containers/clearview/src/clearview_app/cert.py
@@ -2,7 +2,7 @@ from __future__ import annotations
import hashlib
from dataclasses import dataclass
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
from cryptography import x509
from cryptography.hazmat.primitives import hashes, serialization
@@ -30,7 +30,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
subject = x509.Name([
x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"),
])
- expires_at = datetime.utcnow() + timedelta(days=365 * valid_years)
+ expires_at = datetime.now(timezone.utc) + timedelta(days=365 * valid_years)
cert = (
x509.CertificateBuilder()
@@ -38,7 +38,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
.issuer_name(subject)
.public_key(private_key.public_key())
.serial_number(x509.random_serial_number())
- .not_valid_before(datetime.utcnow())
+ .not_valid_before(datetime.now(timezone.utc))
.not_valid_after(expires_at)
.sign(private_key, hashes.SHA256())
)
diff --git a/containers/clearview/src/clearview_app/db_migrate.py b/containers/clearview/src/clearview_app/db_migrate.py
new file mode 100644
index 0000000..02bfefd
--- /dev/null
+++ b/containers/clearview/src/clearview_app/db_migrate.py
@@ -0,0 +1,53 @@
+"""Database migration bootstrap.
+
+Replaces the previous ``Base.metadata.create_all`` + ``_ensure_schema_columns``
+startup path with Alembic. The bootstrap is idempotent and handles three cases:
+
+* **Fresh database** (no tables): run ``upgrade head`` to create the schema and
+ record the Alembic version.
+* **Existing pre-Alembic database** (tables present, no ``alembic_version``):
+ ``stamp head`` — adopt the baseline without re-creating existing tables.
+* **Already under Alembic**: run ``upgrade head`` to apply any new revisions.
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from alembic import command
+from alembic.config import Config
+from sqlalchemy import inspect
+
+from .db import engine
+
+log = logging.getLogger(__name__)
+
+_MIGRATIONS_DIR = Path(__file__).resolve().parent / "migrations"
+# A table that exists in every pre-Alembic Clearview database; its presence
+# (without alembic_version) marks a database that predates Alembic adoption.
+_SENTINEL_TABLE = "scan_jobs"
+
+
+def _alembic_config() -> Config:
+ cfg = Config()
+ cfg.set_main_option("script_location", str(_MIGRATIONS_DIR))
+ return cfg
+
+
+_BASELINE_REVISION = "0001_baseline"
+
+
+def run_migrations() -> None:
+ """Bring the database schema up to date (see module docstring)."""
+ cfg = _alembic_config()
+ tables = set(inspect(engine).get_table_names())
+
+ if "alembic_version" not in tables and _SENTINEL_TABLE in tables:
+ # Pre-Alembic DB: it already matches the baseline, so adopt that
+ # revision without re-creating tables, then let upgrade apply any
+ # later migrations (e.g. the timestamptz conversion in 0002).
+ log.info("Existing pre-Alembic schema detected; stamping baseline %s.", _BASELINE_REVISION)
+ command.stamp(cfg, _BASELINE_REVISION)
+
+ log.info("Applying Alembic migrations (upgrade head).")
+ command.upgrade(cfg, "head")
diff --git a/containers/clearview/src/clearview_app/main.py b/containers/clearview/src/clearview_app/main.py
index 7a8ff30..bb1da71 100644
--- a/containers/clearview/src/clearview_app/main.py
+++ b/containers/clearview/src/clearview_app/main.py
@@ -1,43 +1,21 @@
+"""Clearview API composition root.
+
+Routes live in the ``api_tenants``, ``api_jobs``, and ``api_onboarding`` modules
+(shared helpers in ``api_helpers``). This module only wires the FastAPI app,
+the scan worker lifecycle, health/version endpoints, and static file serving.
+"""
from __future__ import annotations
-import io
-import re
-import uuid
-from datetime import datetime
from pathlib import Path
-from fastapi import FastAPI, File, Form, HTTPException, UploadFile
-from fastapi.responses import FileResponse, RedirectResponse, Response, StreamingResponse
+from fastapi import FastAPI
+from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
-from sqlalchemy import select, text
-from sqlalchemy.orm import joinedload
-from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
-from .db import SessionLocal, engine
-from .default_sites import is_default_site, normalize_site_url
-from .models import Base, PermissionDeviation, ScanJob, ScanTarget, TenantProfile
-from .onboarding import OnboardingError, consume_callback_state, create_connect_url, create_scan_app_for_tenant
-from .cert import generate_tenant_certificate
-from .schemas import (
- ConnectMicrosoftResponse,
- CreateScanAppRequest,
- CreateScanAppResponse,
- CreateScanJobRequest,
- CreateTenantProfileRequest,
- PermissionDeviationItem,
- ProbeResultResponse,
- ResolveGroupsResponse,
- ResolveSharingLinksRequest,
- ResolveSharingLinksResponse,
- SharingLinkTypesResponse,
- ScanJobCreateResponse,
- ScanJobDetail,
- ScanJobSummary,
- ScanTargetItem,
- TenantCertificateResponse,
- TenantProfileItem,
-)
-from .scanners import AuthConfig, probe
+from .api_jobs import router as jobs_router
+from .api_onboarding import router as onboarding_router
+from .api_tenants import router as tenants_router
+from .db_migrate import run_migrations
from .version import display_version
from .worker import ScanWorker
@@ -47,38 +25,9 @@ worker = ScanWorker()
SITE_DIR = Path(__file__).resolve().parents[2] / "site"
-def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
- """
- Extract (group_name, link_type) from principal values such as:
- - SharingLinks...
- - c:0o.c|federateddirectoryclaimprovider|SharingLinks...
- """
- if not principal:
- return None
-
- text = principal.strip()
- segments = [s.strip() for s in text.split("|") if s.strip()]
-
- candidate = ""
- for segment in reversed(segments):
- if segment.lower().startswith("sharinglinks."):
- candidate = segment
- break
- if not candidate and text.lower().startswith("sharinglinks."):
- candidate = text
- if not candidate:
- return None
-
- parts = candidate.split(".")
- if len(parts) < 3:
- return None
- return candidate, parts[2]
-
-
@app.on_event("startup")
def on_startup() -> None:
- Base.metadata.create_all(bind=engine)
- _ensure_schema_columns()
+ run_migrations()
worker.start()
@@ -98,747 +47,13 @@ def version() -> dict[str, str]:
return {"version": display_version()}
-# ---------------------------------------------------------------------------
-# Tenant profiles
-# ---------------------------------------------------------------------------
-
-@app.get("/api/tenants", response_model=list[TenantProfileItem])
-def list_tenants() -> list[TenantProfileItem]:
- with SessionLocal() as db:
- profiles = list(
- db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
- )
- return [_to_tenant_item(p) for p in profiles]
-
-
-@app.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
-def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
- with SessionLocal() as db:
- now = datetime.utcnow()
- profile = TenantProfile(
- id=str(uuid.uuid4()),
- name=payload.name.strip(),
- tenant_id=payload.tenant_id.strip(),
- primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
- client_id=payload.client_id.strip(),
- client_secret=payload.client_secret.strip() if payload.client_secret else None,
- created_at=now,
- updated_at=now,
- )
- db.add(profile)
- db.commit()
- db.refresh(profile)
- return _to_tenant_item(profile)
-
-
-@app.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
-def generate_certificate(profile_id: str) -> TenantCertificateResponse:
- with SessionLocal() as db:
- profile = db.get(TenantProfile, profile_id)
- if not profile:
- raise HTTPException(status_code=404, detail="Tenant profile not found")
- result = generate_tenant_certificate()
- profile.cert_private_key = result.private_key_pem
- profile.cert_public_pem = result.public_cert_pem
- profile.cert_thumbprint = result.thumbprint
- profile.cert_expires_at = result.expires_at
- profile.updated_at = datetime.utcnow()
- db.commit()
- return TenantCertificateResponse(
- thumbprint=result.thumbprint,
- expires_at=result.expires_at,
- public_cert_pem=result.public_cert_pem,
- )
-
-
-@app.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
-def delete_tenant(profile_id: str) -> Response:
- with SessionLocal() as db:
- profile = db.get(TenantProfile, profile_id)
- if not profile:
- raise HTTPException(status_code=404, detail="Tenant profile not found")
- # Detach jobs from this profile before deleting
- db.execute(
- text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
- {"pid": profile_id},
- )
- db.delete(profile)
- db.commit()
- return Response(status_code=204)
+app.include_router(tenants_router)
+app.include_router(jobs_router)
+app.include_router(onboarding_router)
# ---------------------------------------------------------------------------
-# Scan jobs
-# ---------------------------------------------------------------------------
-
-@app.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
-def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
- with SessionLocal() as db:
- tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
- db=db,
- tenant_profile_id=payload.tenant_profile_id,
- tenant_id=payload.tenant_id,
- client_id=payload.client_id,
- client_secret=payload.client_secret,
- )
- source_type = "manual"
- if payload.scan_type == "entra_groups":
- if payload.scan_all_groups:
- raw_targets = _enumerate_all_entra_groups(
- tenant_id=tenant_id,
- client_id=client_id,
- client_secret=client_secret,
- profile_id=profile_id,
- )
- source_type = "tenant_all"
- else:
- raw_targets = [str(g) for g in payload.group_ids]
- elif payload.scan_type == "mailbox":
- if payload.scan_all_mailboxes:
- organization = payload.organization
- if (not organization) and profile_id:
- with SessionLocal() as db:
- profile = db.get(TenantProfile, profile_id)
- if profile and profile.primary_domain:
- organization = profile.primary_domain
- raw_targets = _enumerate_all_mailboxes(
- organization=organization,
- tenant_id=tenant_id,
- client_id=client_id,
- client_secret=client_secret,
- profile_id=profile_id,
- )
- source_type = "tenant_all"
- else:
- raw_targets = [str(m) for m in payload.mailboxes]
- else:
- raw_targets = [str(item) for item in payload.site_urls]
- return _create_job_from_targets(
- raw_targets=raw_targets,
- scan_type=payload.scan_type,
- skip_default_sites=payload.skip_default_sites,
- source_type=source_type,
- tenant_id=tenant_id,
- client_id=client_id,
- client_secret=client_secret,
- tenant_profile_id=profile_id,
- )
-
-
-@app.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
-def create_scan_job_from_csv(
- skip_default_sites: bool = True,
- scan_type: str = Form("sharepoint"),
- tenant_profile_id: str | None = Form(None),
- tenant_id: str | None = Form(None),
- client_id: str | None = Form(None),
- client_secret: str | None = Form(None),
- file: UploadFile = File(...),
-) -> ScanJobCreateResponse:
- with SessionLocal() as db:
- resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
- db=db,
- tenant_profile_id=tenant_profile_id,
- tenant_id=tenant_id,
- client_id=client_id,
- client_secret=client_secret,
- )
- content = file.file.read()
- if scan_type == "mailbox":
- parsed = parse_mailboxes_csv(content)
- targets = parsed.mailboxes
- elif scan_type == "entra_groups":
- parsed = parse_entra_groups_csv(content)
- targets = parsed.urls
- else:
- parsed = parse_sites_csv(content)
- targets = parsed.urls
- response = _create_job_from_targets(
- raw_targets=targets,
- scan_type=scan_type,
- skip_default_sites=skip_default_sites,
- source_type="csv",
- tenant_id=resolved_tenant_id,
- client_id=resolved_client_id,
- client_secret=resolved_client_secret,
- tenant_profile_id=profile_id,
- )
-
- if parsed.invalid_rows:
- csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
- with SessionLocal() as db:
- job = db.get(ScanJob, response.job.id)
- if job:
- if job.warning_message:
- job.warning_message = f"{job.warning_message} | {csv_warning}"
- else:
- job.warning_message = csv_warning
- job.updated_at = datetime.utcnow()
- db.commit()
- db.refresh(job)
- response.job.warning_message = job.warning_message
-
- return response
-
-
-@app.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
-def cancel_scan_job(job_id: str) -> ScanJobSummary:
- with SessionLocal() as db:
- stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
- job = db.execute(stmt).unique().scalar_one_or_none()
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
- if job.status not in ("queued", "running"):
- raise HTTPException(status_code=409, detail="Job is not queued or running")
- now = datetime.utcnow()
- job.status = "cancelled"
- job.updated_at = now
- job.finished_at = now
- job.scan_activity = None
- db.commit()
- db.refresh(job)
- stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
- job = db.execute(stmt).unique().scalar_one()
- return _to_job_summary(job)
-
-
-@app.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
-def delete_scan_job(job_id: str) -> Response:
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id)
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
- if job.status in ("queued", "running"):
- raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
- db.delete(job)
- db.commit()
- return Response(status_code=204)
-
-
-@app.get("/api/scan-jobs", response_model=list[ScanJobSummary])
-def list_scan_jobs(
- limit: int = 20,
- tenant_profile_id: str | None = None,
- scan_type: str | None = None,
-) -> list[ScanJobSummary]:
- with SessionLocal() as db:
- stmt = (
- select(ScanJob)
- .options(joinedload(ScanJob.tenant_profile))
- .order_by(ScanJob.created_at.desc())
- .limit(max(1, min(limit, 100)))
- )
- if tenant_profile_id:
- stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
- if scan_type:
- stmt = stmt.where(ScanJob.scan_type == scan_type)
- jobs = list(db.execute(stmt).unique().scalars())
- return [_to_job_summary(job) for job in jobs]
-
-
-@app.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
-def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id)
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
-
- principals = list(
- db.execute(
- select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
- ).scalars()
- )
-
- type_counts: dict[str, int] = {}
- for principal in principals:
- parsed = _extract_sharing_link_group_and_type(str(principal or ""))
- if not parsed:
- continue
- _group_name, link_type = parsed
- type_counts[link_type] = type_counts.get(link_type, 0) + 1
-
- return SharingLinkTypesResponse(type_counts=type_counts)
-
-
-@app.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
-def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
- from .scanner import resolve_sharing_link_members
-
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id)
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
- if job.status in ("queued", "running"):
- raise HTTPException(status_code=409, detail="Job is still running")
-
- cert_private_key: str | None = None
- cert_thumbprint: str | None = None
- cert_public_pem: str | None = None
- if job.tenant_profile_id:
- profile = db.get(TenantProfile, job.tenant_profile_id)
- if profile:
- cert_private_key = profile.cert_private_key
- cert_thumbprint = profile.cert_thumbprint
- cert_public_pem = profile.cert_public_pem
-
- auth = AuthConfig(
- tenant_id=job.auth_tenant_id or "",
- client_id=job.auth_client_id or "",
- client_secret=job.auth_client_secret or "",
- cert_private_key=cert_private_key,
- cert_thumbprint=cert_thumbprint,
- cert_public_pem=cert_public_pem,
- )
-
- all_deviations = list(
- db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
- )
-
- # Group by (site_url, principal) so each unique group is resolved once
- groups: dict[tuple[str, str], list[int]] = {}
- for dev in all_deviations:
- parsed = _extract_sharing_link_group_and_type(dev.principal)
- if not parsed:
- continue
- group_name, link_type = parsed
- if link_type not in payload.link_types:
- continue
- key = (dev.site_url, group_name)
- groups.setdefault(key, []).append(dev.id)
-
- updated_deviations = 0
- for (site_url, group_name), dev_ids in groups.items():
- members = resolve_sharing_link_members(site_url, group_name, auth)
- resolved_members = ", ".join(members) if members else ""
- with SessionLocal() as db:
- for dev_id in dev_ids:
- dev = db.get(PermissionDeviation, dev_id)
- if dev:
- dev.resolved_members = resolved_members
- db.commit()
- updated_deviations += len(dev_ids)
-
- return ResolveSharingLinksResponse(
- resolved_groups=len(groups),
- updated_deviations=updated_deviations,
- )
-
-
-@app.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
-def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
- """
- Expand group principals on this job's deviations and write each group's
- member list to permission_deviations.resolved_members. Handles both
- classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
- assigned directly at root (via Microsoft Graph). Skips email-shape users
- and SharingLinks groups (those have their own resolver).
- """
- from .scanners.sharepoint import (
- is_aad_group_principal,
- is_sharepoint_group_principal,
- resolve_aad_group_members,
- resolve_sharing_link_members,
- )
-
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id)
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
- if job.status in ("queued", "running"):
- raise HTTPException(status_code=409, detail="Job is still running")
- if (job.scan_type or "sharepoint") == "mailbox":
- raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
-
- cert_private_key: str | None = None
- cert_thumbprint: str | None = None
- cert_public_pem: str | None = None
- if job.tenant_profile_id:
- profile = db.get(TenantProfile, job.tenant_profile_id)
- if profile:
- cert_private_key = profile.cert_private_key
- cert_thumbprint = profile.cert_thumbprint
- cert_public_pem = profile.cert_public_pem
-
- auth = AuthConfig(
- tenant_id=job.auth_tenant_id or "",
- client_id=job.auth_client_id or "",
- client_secret=job.auth_client_secret or "",
- cert_private_key=cert_private_key,
- cert_thumbprint=cert_thumbprint,
- cert_public_pem=cert_public_pem,
- )
-
- all_deviations = list(
- db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
- )
-
- # Group deviations by (site_url, principal) so each unique group is resolved once
- groups: dict[tuple[str, str], list[int]] = {}
- for dev in all_deviations:
- if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
- continue
- key = (dev.site_url, dev.principal)
- groups.setdefault(key, []).append(dev.id)
-
- resolved = 0
- skipped = 0
- updated = 0
- for (site_url, group_name), dev_ids in groups.items():
- try:
- if is_aad_group_principal(group_name):
- members = resolve_aad_group_members(group_name, auth)
- else:
- members = resolve_sharing_link_members(site_url, group_name, auth)
- except Exception: # noqa: BLE001
- members = []
-
- if not members:
- skipped += 1
- continue
-
- resolved_text = ", ".join(members)
- with SessionLocal() as db:
- for dev_id in dev_ids:
- dev = db.get(PermissionDeviation, dev_id)
- if dev:
- dev.resolved_members = resolved_text
- db.commit()
- resolved += 1
- updated += len(dev_ids)
-
- return ResolveGroupsResponse(
- resolved_groups=resolved,
- skipped_groups=skipped,
- updated_deviations=updated,
- )
-
-
-@app.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
-def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id)
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
- target = db.get(ScanTarget, target_id)
- if not target or target.job_id != job_id:
- raise HTTPException(status_code=404, detail="Target not found")
- if job.status in ("queued", "running"):
- raise HTTPException(status_code=409, detail="Job is still running")
-
- cert_private_key: str | None = None
- cert_thumbprint: str | None = None
- cert_public_pem: str | None = None
- if job.tenant_profile_id:
- profile = db.get(TenantProfile, job.tenant_profile_id)
- if profile:
- cert_private_key = profile.cert_private_key
- cert_thumbprint = profile.cert_thumbprint
- cert_public_pem = profile.cert_public_pem
-
- auth = AuthConfig(
- tenant_id=job.auth_tenant_id or "",
- client_id=job.auth_client_id or "",
- client_secret=job.auth_client_secret or "",
- cert_private_key=cert_private_key,
- cert_thumbprint=cert_thumbprint,
- cert_public_pem=cert_public_pem,
- )
- site_url = target.site_url
- job_scan_type = job.scan_type or "sharepoint"
-
- result = probe(job_scan_type, site_url, auth)
-
- with SessionLocal() as db:
- target = db.get(ScanTarget, target_id)
- if not target:
- raise HTTPException(status_code=404, detail="Target not found")
- now = datetime.utcnow()
- target.last_probe_at = now
- target.last_probe_ok = result.ok
- target.last_probe_message = result.message
- target.updated_at = now
- db.commit()
- db.refresh(target)
- return ProbeResultResponse(
- target_id=target.id,
- ok=result.ok,
- message=result.message,
- last_probe_at=target.last_probe_at,
- )
-
-
-@app.get("/api/scan-jobs/{job_id}/export")
-def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
- import openpyxl
- from openpyxl.styles import Font, PatternFill
-
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
-
- targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
- if site_url:
- targets_q = targets_q.where(ScanTarget.site_url == site_url)
- targets = list(db.execute(targets_q).scalars())
-
- deviations_q = (
- select(PermissionDeviation)
- .where(PermissionDeviation.job_id == job.id)
- .order_by(PermissionDeviation.id.desc())
- )
- if site_url:
- deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
- deviations = list(db.execute(deviations_q).scalars())
-
- wb = openpyxl.Workbook()
- header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
- header_font_white = Font(bold=True, color="FFFFFF")
-
- _risk_styles: dict[str, tuple] = {
- "Critical": (
- PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
- Font(bold=True, color="7B0000"),
- ),
- "High": (
- PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
- Font(bold=True, color="7C2D00"),
- ),
- "Low": (
- PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
- Font(bold=True, color="0C4A6E"),
- ),
- "Unknown": (
- PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
- Font(bold=True, color="555555"),
- ),
- }
-
- def _style_header(ws, headers):
- ws.append(headers)
- for cell in ws[1]:
- cell.font = header_font_white
- cell.fill = header_fill
-
- scan_type = job.scan_type or "sharepoint"
-
- target_label = {
- "sharepoint": "Site URL",
- "sharepoint_root": "Site URL",
- "mailbox": "Mailbox",
- "entra_groups": "Group",
- }.get(scan_type, "Target")
-
- # Targets sheet
- ws_targets = wb.active
- ws_targets.title = "Targets"
- _style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
- for t in targets:
- ws_targets.append([
- t.site_url,
- t.status,
- t.attempts,
- t.error_message or "",
- t.started_at.isoformat() if t.started_at else "",
- t.finished_at.isoformat() if t.finished_at else "",
- ])
- for col in ws_targets.columns:
- ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
-
- # Results sheet — name and columns depend on scan type
- if scan_type == "mailbox":
- ws_dev = wb.create_sheet("Mailbox Permissions")
- _style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
- deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
- for d in deviations:
- ws_dev.append([
- d.site_url,
- d.object_url,
- d.permission_type or d.object_type,
- d.principal,
- d.role_name,
- ])
- elif scan_type == "entra_groups":
- ws_dev = wb.create_sheet("Group Memberships")
- _style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
- deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
- for d in deviations:
- ws_dev.append([
- d.object_url,
- d.permission_type or "",
- d.principal,
- d.role_name,
- ])
- elif scan_type == "sharepoint_root":
- ws_dev = wb.create_sheet("Root Permissions")
- _style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
- deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
- for d in deviations:
- ws_dev.append([
- d.site_url,
- d.principal,
- d.resolved_members or "",
- d.role_name,
- ])
- else:
- ws_dev = wb.create_sheet("Deviations")
- _style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
- deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
- for d in deviations:
- base = (d.site_url or "").rstrip("/")
- obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
- link_risk = _sharing_link_risk_label(d.principal)
- ws_dev.append([
- d.site_url,
- obj_rel,
- d.object_type,
- d.principal,
- link_risk,
- d.resolved_members or "",
- d.role_name,
- d.delta_type,
- ])
- if link_risk in _risk_styles:
- risk_fill, risk_font = _risk_styles[link_risk]
- risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
- risk_cell.fill = risk_fill
- risk_cell.font = risk_font
- for col in ws_dev.columns:
- ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
-
- buf = io.BytesIO()
- wb.save(buf)
- buf.seek(0)
-
- filename = _build_export_filename(job, job_id)
- return StreamingResponse(
- buf,
- media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- headers={"Content-Disposition": f'attachment; filename="{filename}"'},
- )
-
-
-@app.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
-def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
- with SessionLocal() as db:
- job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
- if not job:
- raise HTTPException(status_code=404, detail="Job not found")
-
- targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
- if site_url:
- targets_q = targets_q.where(ScanTarget.site_url == site_url)
- targets = list(db.execute(targets_q).scalars())
-
- deviations_q = (
- select(PermissionDeviation)
- .where(PermissionDeviation.job_id == job.id)
- .order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
- )
- if site_url:
- deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
- else:
- deviations_q = deviations_q.limit(1000)
- deviations = list(db.execute(deviations_q).scalars())
-
- return ScanJobDetail(
- **_to_job_summary(job).model_dump(),
- targets=[
- ScanTargetItem(
- id=t.id,
- site_url=t.site_url,
- status=t.status,
- attempts=t.attempts,
- error_message=t.error_message,
- started_at=t.started_at,
- finished_at=t.finished_at,
- last_probe_at=t.last_probe_at,
- last_probe_ok=t.last_probe_ok,
- last_probe_message=t.last_probe_message,
- )
- for t in targets
- ],
- deviations=[
- PermissionDeviationItem(
- id=d.id,
- site_url=d.site_url,
- object_url=d.object_url,
- object_type=d.object_type,
- principal=d.principal,
- role_name=d.role_name,
- delta_type=d.delta_type,
- permission_type=d.permission_type,
- resolved_members=d.resolved_members,
- created_at=d.created_at,
- )
- for d in deviations
- ],
- )
-
-
-# ---------------------------------------------------------------------------
-# Onboarding
-# ---------------------------------------------------------------------------
-
-@app.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
-def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
- try:
- result = create_scan_app_for_tenant(
- tenant_id=payload.tenant_id,
- display_name=payload.display_name,
- )
- except OnboardingError as exc:
- raise HTTPException(status_code=400, detail=str(exc)) from exc
- except Exception as exc: # noqa: BLE001
- raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
-
- return CreateScanAppResponse(
- tenant_id=result.tenant_id,
- client_id=result.client_id,
- client_secret=result.client_secret,
- app_object_id=result.app_object_id,
- service_principal_id=result.service_principal_id,
- display_name=result.display_name,
- )
-
-
-@app.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
-def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
- try:
- return ConnectMicrosoftResponse(connect_url=create_connect_url())
- except OnboardingError as exc:
- raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@app.get("/api/onboarding/microsoft/callback")
-def onboarding_microsoft_callback(
- tenant: str | None = None,
- state: str | None = None,
- error: str | None = None,
- error_description: str | None = None,
-) -> RedirectResponse:
- if error:
- message = (error_description or error).replace(" ", "+")
- return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
-
- if not state or not consume_callback_state(state):
- return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
-
- if not tenant:
- return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
-
- return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
-
-
-@app.get("/api/onboarding/status")
-def onboarding_status() -> dict[str, bool]:
- from . import config
- automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
- return {"automated_available": automated}
-
-
-# ---------------------------------------------------------------------------
-# Static files
+# Static files (mounted last so explicit API routes take precedence)
# ---------------------------------------------------------------------------
@app.get("/")
@@ -847,306 +62,3 @@ def index() -> FileResponse:
app.mount("/", StaticFiles(directory=SITE_DIR, html=True), name="site")
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-_SCAN_TYPE_LABELS = {
- "sharepoint": "Deviations",
- "sharepoint_root": "Root",
- "mailbox": "Mailbox",
- "entra_groups": "EntraGroups",
-}
-
-
-def _build_export_filename(job: ScanJob, job_id: str) -> str:
- tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
- safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
- scan_type = job.scan_type or "sharepoint"
- type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
- short_id = job_id.replace("-", "")[-12:]
- return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
-
-
-def _enumerate_all_entra_groups(
- tenant_id: str,
- client_id: str,
- client_secret: str | None,
- profile_id: str | None,
-) -> list[str]:
- cert_private_key: str | None = None
- cert_thumbprint: str | None = None
- cert_public_pem: str | None = None
- if profile_id:
- with SessionLocal() as db:
- profile = db.get(TenantProfile, profile_id)
- if profile:
- cert_private_key = profile.cert_private_key
- cert_thumbprint = profile.cert_thumbprint
- cert_public_pem = profile.cert_public_pem
-
- auth = AuthConfig(
- tenant_id=tenant_id,
- client_id=client_id,
- client_secret=client_secret or "",
- cert_private_key=cert_private_key,
- cert_thumbprint=cert_thumbprint,
- cert_public_pem=cert_public_pem,
- )
-
- from .scanners import entra as _entra
-
- try:
- return _entra.list_all_groups(auth)
- except Exception as exc: # noqa: BLE001
- raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
-
-
-def _enumerate_all_mailboxes(
- organization: str | None,
- tenant_id: str,
- client_id: str,
- client_secret: str | None,
- profile_id: str | None,
-) -> list[str]:
- if not organization or "." not in organization:
- raise HTTPException(
- status_code=400,
- detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
- )
-
- cert_private_key: str | None = None
- cert_thumbprint: str | None = None
- cert_public_pem: str | None = None
- if profile_id:
- with SessionLocal() as db:
- profile = db.get(TenantProfile, profile_id)
- if profile:
- cert_private_key = profile.cert_private_key
- cert_thumbprint = profile.cert_thumbprint
- cert_public_pem = profile.cert_public_pem
-
- auth = AuthConfig(
- tenant_id=tenant_id,
- client_id=client_id,
- client_secret=client_secret or "",
- cert_private_key=cert_private_key,
- cert_thumbprint=cert_thumbprint,
- cert_public_pem=cert_public_pem,
- )
-
- from .scanners import mailbox as _mailbox
-
- try:
- return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
- except Exception as exc: # noqa: BLE001
- raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
-
-
-def _resolve_credentials(
- db,
- tenant_profile_id: str | None,
- tenant_id: str | None,
- client_id: str | None,
- client_secret: str | None,
-) -> tuple[str, str, str | None, str | None]:
- if tenant_profile_id:
- profile = db.get(TenantProfile, tenant_profile_id)
- if not profile:
- raise HTTPException(status_code=404, detail="Tenant profile not found")
- if not profile.client_secret and not profile.cert_thumbprint:
- raise HTTPException(
- status_code=400,
- detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
- )
- return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
- if tenant_id and client_id and client_secret:
- return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
- raise HTTPException(
- status_code=400,
- detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
- )
-
-
-def _create_job_from_targets(
- raw_targets: list[str],
- scan_type: str,
- skip_default_sites: bool,
- source_type: str,
- tenant_id: str,
- client_id: str,
- client_secret: str,
- tenant_profile_id: str | None = None,
-) -> ScanJobCreateResponse:
- accepted: list[str] = []
- skipped_default_urls: list[str] = []
- invalid: list[str] = []
-
- seen: set[str] = set()
-
- for raw in raw_targets:
- if scan_type == "mailbox":
- normalized = (raw or "").strip().lower()
- if not normalized or "@" not in normalized:
- invalid.append(raw)
- continue
- elif scan_type == "entra_groups":
- normalized = (raw or "").strip()
- if not normalized:
- invalid.append(raw)
- continue
- else:
- normalized = normalize_site_url(raw) or ""
- if not normalized:
- invalid.append(raw)
- continue
-
- if normalized in seen:
- continue
- seen.add(normalized)
-
- if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
- skipped_default_urls.append(normalized)
- continue
-
- accepted.append(normalized)
-
- with SessionLocal() as db:
- now = datetime.utcnow()
- job = ScanJob(
- id=str(uuid.uuid4()),
- source_type=source_type,
- scan_type=scan_type,
- status="queued" if accepted else "completed",
- skip_default_sites=skip_default_sites,
- tenant_profile_id=tenant_profile_id,
- auth_tenant_id=tenant_id,
- auth_client_id=client_id,
- auth_client_secret=client_secret,
- total_targets=len(accepted),
- skipped_targets=len(skipped_default_urls),
- warning_message=None,
- error_message=None,
- created_at=now,
- updated_at=now,
- finished_at=now if not accepted else None,
- )
-
- if not accepted:
- if scan_type == "mailbox":
- job.warning_message = "No scannable mailboxes after validation"
- else:
- job.warning_message = "No scannable sites after validation and default-site filtering"
-
- db.add(job)
- db.flush()
-
- for index, target in enumerate(accepted, start=1):
- db.add(
- ScanTarget(
- job_id=job.id,
- site_url=target,
- source_row=index,
- status="queued",
- attempts=0,
- created_at=now,
- updated_at=now,
- )
- )
-
- db.commit()
-
- stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
- job = db.execute(stmt).unique().scalar_one()
-
- return ScanJobCreateResponse(
- job=_to_job_summary(job),
- accepted_urls=accepted,
- skipped_default_urls=skipped_default_urls,
- invalid_urls=invalid,
- )
-
-
-def _to_job_summary(job: ScanJob) -> ScanJobSummary:
- return ScanJobSummary(
- id=job.id,
- status=job.status,
- source_type=job.source_type,
- scan_type=job.scan_type or "sharepoint",
- skip_default_sites=job.skip_default_sites,
- tenant_profile_id=job.tenant_profile_id,
- tenant_name=job.tenant_profile.name if job.tenant_profile else None,
- total_targets=job.total_targets,
- processed_targets=job.processed_targets,
- successful_targets=job.successful_targets,
- failed_targets=job.failed_targets,
- skipped_targets=job.skipped_targets,
- items_scanned=job.items_scanned,
- scan_activity=job.scan_activity if job.status == "running" else None,
- warning_message=job.warning_message,
- error_message=job.error_message,
- created_at=job.created_at,
- updated_at=job.updated_at,
- started_at=job.started_at,
- finished_at=job.finished_at,
- )
-
-
-def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
- return TenantProfileItem(
- id=profile.id,
- name=profile.name,
- tenant_id=profile.tenant_id,
- primary_domain=profile.primary_domain,
- client_id=profile.client_id,
- has_certificate=bool(profile.cert_thumbprint),
- cert_thumbprint=profile.cert_thumbprint,
- cert_expires_at=profile.cert_expires_at,
- created_at=profile.created_at,
- updated_at=profile.updated_at,
- )
-
-
-def _sharing_link_risk_label(principal: str) -> str:
- if not principal.startswith("SharingLinks."):
- return ""
- parts = principal.split(".", 3)
- link_type = parts[2] if len(parts) >= 3 else ""
- if link_type.startswith("Anonymous"):
- return "Critical"
- if link_type == "Flexible":
- return "High"
- if link_type.startswith("Organization"):
- return "Low"
- if link_type.startswith("Direct"):
- return "Low"
- return "Unknown"
-
-
-def _ensure_schema_columns() -> None:
- stmts = [
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_tenant_id VARCHAR(128)",
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_client_id VARCHAR(128)",
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_client_secret TEXT",
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS tenant_profile_id VARCHAR(36)",
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS items_scanned INTEGER NOT NULL DEFAULT 0",
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_activity TEXT",
- "ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_type VARCHAR(32) NOT NULL DEFAULT 'sharepoint'",
- "ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS permission_type VARCHAR(32)",
- "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS primary_domain VARCHAR(256)",
- "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS client_secret TEXT",
- "ALTER TABLE tenant_profiles ALTER COLUMN client_secret DROP NOT NULL",
- "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_private_key TEXT",
- "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_public_pem TEXT",
- "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_thumbprint VARCHAR(64)",
- "ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_expires_at TIMESTAMP",
- "ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS resolved_members TEXT",
- "ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_at TIMESTAMP",
- "ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_ok BOOLEAN",
- "ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_message TEXT",
- ]
- with engine.begin() as conn:
- for stmt in stmts:
- conn.execute(text(stmt))
diff --git a/containers/clearview/src/clearview_app/migrations/env.py b/containers/clearview/src/clearview_app/migrations/env.py
new file mode 100644
index 0000000..c07a2c4
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/env.py
@@ -0,0 +1,58 @@
+"""Alembic environment for Clearview.
+
+Reuses the application's SQLAlchemy engine (already configured with the
+normalized DATABASE_URL and pool_pre_ping) so migrations run against exactly
+the same database the app uses. Logging config from alembic.ini is applied
+only when Alembic is invoked through the CLI; programmatic invocation from
+``clearview_app.db_migrate`` passes a Config without a file.
+"""
+from __future__ import annotations
+
+from logging.config import fileConfig
+
+from alembic import context
+
+from clearview_app.config import DATABASE_URL
+from clearview_app.db import _normalize_database_url, engine as app_engine
+from clearview_app.models import Base
+
+config = context.config
+
+if config.config_file_name is not None:
+ try:
+ fileConfig(config.config_file_name)
+ except Exception: # noqa: BLE001 - logging config is best-effort
+ pass
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+ """Emit SQL to stdout without a live DB connection."""
+ context.configure(
+ url=_normalize_database_url(DATABASE_URL),
+ target_metadata=target_metadata,
+ literal_binds=True,
+ dialect_opts={"paramstyle": "named"},
+ compare_type=True,
+ )
+ with context.begin_transaction():
+ context.run_migrations()
+
+
+def run_migrations_online() -> None:
+ """Run migrations against the live database via the app engine."""
+ with app_engine.connect() as connection:
+ context.configure(
+ connection=connection,
+ target_metadata=target_metadata,
+ compare_type=True,
+ )
+ with context.begin_transaction():
+ context.run_migrations()
+
+
+if context.is_offline_mode():
+ run_migrations_offline()
+else:
+ run_migrations_online()
diff --git a/containers/clearview/src/clearview_app/migrations/script.py.mako b/containers/clearview/src/clearview_app/migrations/script.py.mako
new file mode 100644
index 0000000..da29ede
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+ ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+ ${downgrades if downgrades else "pass"}
diff --git a/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py b/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
new file mode 100644
index 0000000..fe959d5
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/versions/0001_baseline.py
@@ -0,0 +1,31 @@
+"""baseline schema
+
+Captures the full Clearview schema as defined by the SQLAlchemy models at the
+time Alembic was adopted. Creating it via ``Base.metadata.create_all`` keeps the
+baseline guaranteed-identical to the models (the same DDL the app emitted before
+Alembic). Existing databases are ``stamp``-ed to this revision rather than
+re-running ``upgrade`` (see ``clearview_app.db_migrate``).
+
+Revision ID: 0001_baseline
+Revises:
+Create Date: 2026-05-26
+"""
+from __future__ import annotations
+
+from alembic import op
+
+from clearview_app.models import Base
+
+# revision identifiers, used by Alembic.
+revision = "0001_baseline"
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ Base.metadata.create_all(bind=op.get_bind())
+
+
+def downgrade() -> None:
+ Base.metadata.drop_all(bind=op.get_bind())
diff --git a/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py b/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py
new file mode 100644
index 0000000..c32a973
--- /dev/null
+++ b/containers/clearview/src/clearview_app/migrations/versions/0002_timestamptz.py
@@ -0,0 +1,63 @@
+"""convert timestamp columns to timestamptz
+
+The app now uses timezone-aware UTC datetimes (DateTime(timezone=True)).
+Existing databases store naive ``timestamp without time zone`` values that were
+written as UTC, so we reinterpret them as UTC while converting. The conversion
+is guarded per column on the current type, so it is a no-op on databases whose
+columns are already ``timestamptz`` (e.g. a fresh DB created from the updated
+baseline models).
+
+Revision ID: 0002_timestamptz
+Revises: 0001_baseline
+Create Date: 2026-05-26
+"""
+from __future__ import annotations
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "0002_timestamptz"
+down_revision = "0001_baseline"
+branch_labels = None
+depends_on = None
+
+# Table -> datetime columns (names come from our own models, never user input).
+_COLUMNS: dict[str, tuple[str, ...]] = {
+ "tenant_profiles": ("cert_expires_at", "created_at", "updated_at"),
+ "scan_jobs": ("created_at", "updated_at", "started_at", "finished_at", "heartbeat_at"),
+ "scan_targets": ("last_probe_at", "created_at", "updated_at", "started_at", "finished_at"),
+ "permission_deviations": ("created_at",),
+}
+
+
+def _column_type(bind, table: str, column: str) -> str | None:
+ return bind.execute(
+ sa.text(
+ "SELECT data_type FROM information_schema.columns "
+ "WHERE table_name = :t AND column_name = :c"
+ ),
+ {"t": table, "c": column},
+ ).scalar()
+
+
+def upgrade() -> None:
+ bind = op.get_bind()
+ for table, columns in _COLUMNS.items():
+ for column in columns:
+ if _column_type(bind, table, column) == "timestamp without time zone":
+ op.execute(
+ f'ALTER TABLE {table} ALTER COLUMN {column} '
+ f"TYPE timestamptz USING {column} AT TIME ZONE 'UTC'"
+ )
+
+
+def downgrade() -> None:
+ bind = op.get_bind()
+ for table, columns in _COLUMNS.items():
+ for column in columns:
+ if _column_type(bind, table, column) == "timestamp with time zone":
+ op.execute(
+ f'ALTER TABLE {table} ALTER COLUMN {column} '
+ f"TYPE timestamp USING {column} AT TIME ZONE 'UTC'"
+ )
diff --git a/containers/clearview/src/clearview_app/models.py b/containers/clearview/src/clearview_app/models.py
index ceccf26..e67483e 100644
--- a/containers/clearview/src/clearview_app/models.py
+++ b/containers/clearview/src/clearview_app/models.py
@@ -1,11 +1,16 @@
from __future__ import annotations
-from datetime import datetime
+from datetime import datetime, timezone
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+def _utcnow() -> datetime:
+ """Timezone-aware UTC now, used as the default for timestamp columns."""
+ return datetime.now(timezone.utc)
+
+
class Base(DeclarativeBase):
pass
@@ -22,9 +27,9 @@ class TenantProfile(Base):
cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
- cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
- updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+ cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+ updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile")
@@ -56,11 +61,11 @@ class ScanJob(Base):
warning_message: Mapped[str | None] = mapped_column(Text, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
- updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
- started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
- finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
- heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+ updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+ started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+ finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+ heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs")
targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan")
@@ -79,14 +84,14 @@ class ScanTarget(Base):
attempts: Mapped[int] = mapped_column(Integer, default=0)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
- last_probe_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+ last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
- updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
- started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
- finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+ updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
+ started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+ finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
job: Mapped[ScanJob] = relationship(back_populates="targets")
deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan")
@@ -108,7 +113,7 @@ class PermissionDeviation(Base):
permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
job: Mapped[ScanJob] = relationship(back_populates="deviations")
target: Mapped[ScanTarget] = relationship(back_populates="deviations")
diff --git a/containers/clearview/src/clearview_app/scanners/sharepoint.py b/containers/clearview/src/clearview_app/scanners/sharepoint.py
index 097aefa..db4db24 100644
--- a/containers/clearview/src/clearview_app/scanners/sharepoint.py
+++ b/containers/clearview/src/clearview_app/scanners/sharepoint.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import re
+import threading
import time
from dataclasses import dataclass
from urllib.parse import urlparse
@@ -32,7 +33,13 @@ class PermissionEntry:
role_name: str
-_TOKEN_CACHE: dict[str, str] = {}
+# Cache maps cache_key -> (access_token, expires_at_epoch). Guarded by
+# _TOKEN_LOCK because the worker acquires tokens from multiple threads.
+_TOKEN_CACHE: dict[str, tuple[str, float]] = {}
+_TOKEN_LOCK = threading.Lock()
+# Reuse one MSAL app per (tenant, client, auth_method) so MSAL's own token
+# cache works and refreshes app tokens automatically.
+_MSAL_APPS: dict[str, "msal.ConfidentialClientApplication"] = {}
def scan_site_for_deviations(
@@ -612,18 +619,20 @@ def _probe_hint(error: str, stage: str) -> str:
return error[:220]
-def _get_token_for_host(host: str, auth: AuthConfig) -> str:
- auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
- cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
- cached = _TOKEN_CACHE.get(cache_key)
- if cached:
- return cached
+def _get_msal_app(auth: AuthConfig, auth_method: str) -> "msal.ConfidentialClientApplication":
+ """Return a cached ConfidentialClientApplication for these credentials.
+
+ Reusing the app object lets MSAL's built-in token cache serve and refresh
+ app-only tokens instead of re-authenticating on every call.
+ """
+ app_key = f"{auth.tenant_id}|{auth.client_id}|{auth_method}"
+ app = _MSAL_APPS.get(app_key)
+ if app is not None:
+ return app
- scope = f"https://{host}/.default"
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
-
if auth_method == "cert":
- client_credential = {
+ client_credential: dict[str, str | None] | str | None = {
"thumbprint": auth.cert_thumbprint,
"private_key": auth.cert_private_key,
}
@@ -635,16 +644,34 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
authority=authority,
client_credential=client_credential,
)
- result = app.acquire_token_for_client(scopes=[scope])
+ _MSAL_APPS[app_key] = app
+ return app
- if "access_token" not in result:
- error = result.get("error", "unknown")
- description = result.get("error_description", "")
- raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
- token = str(result["access_token"])
- _TOKEN_CACHE[cache_key] = token
- return token
+def _get_token_for_host(host: str, auth: AuthConfig) -> str:
+ auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
+ cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
+
+ with _TOKEN_LOCK:
+ cached = _TOKEN_CACHE.get(cache_key)
+ if cached is not None and time.time() < cached[1]:
+ return cached[0]
+
+ scope = f"https://{host}/.default"
+ app = _get_msal_app(auth, auth_method)
+ result = app.acquire_token_for_client(scopes=[scope])
+
+ if "access_token" not in result:
+ error = result.get("error", "unknown")
+ description = result.get("error_description", "")
+ raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
+
+ token = str(result["access_token"])
+ # expires_in is seconds-from-now; refresh 60s early to avoid edge expiry.
+ expires_in = int(result.get("expires_in", 3600))
+ expires_at = time.time() + max(expires_in - 60, 0)
+ _TOKEN_CACHE[cache_key] = (token, expires_at)
+ return token
def _iter_paged(url: str, headers: dict[str, str]):
diff --git a/containers/clearview/src/clearview_app/schemas.py b/containers/clearview/src/clearview_app/schemas.py
index 19cfe25..910ba14 100644
--- a/containers/clearview/src/clearview_app/schemas.py
+++ b/containers/clearview/src/clearview_app/schemas.py
@@ -1,9 +1,14 @@
from __future__ import annotations
from datetime import datetime
+from typing import Literal
from pydantic import BaseModel, Field, HttpUrl
+# Valid scan types, mirrored by the frontend scan-type dropdowns. Used to
+# validate incoming job requests (FastAPI returns 422 on anything else).
+ScanType = Literal["sharepoint", "sharepoint_root", "mailbox", "entra_groups"]
+
class CreateTenantProfileRequest(BaseModel):
name: str
@@ -33,7 +38,7 @@ class TenantCertificateResponse(BaseModel):
class CreateScanJobRequest(BaseModel):
- scan_type: str = "sharepoint"
+ scan_type: ScanType = "sharepoint"
site_urls: list[HttpUrl] = Field(default_factory=list)
mailboxes: list[str] = Field(default_factory=list)
scan_all_mailboxes: bool = False
diff --git a/containers/clearview/src/clearview_app/version.py b/containers/clearview/src/clearview_app/version.py
index b31ae7a..c0cd924 100644
--- a/containers/clearview/src/clearview_app/version.py
+++ b/containers/clearview/src/clearview_app/version.py
@@ -7,7 +7,7 @@ history, so operators can see exactly which image build is running.
from __future__ import annotations
VERSION = "v0.1.0"
-BUILD = 1
+BUILD = 2
def display_version() -> str:
diff --git a/containers/clearview/src/clearview_app/worker.py b/containers/clearview/src/clearview_app/worker.py
index a78ae63..3d3f6ca 100644
--- a/containers/clearview/src/clearview_app/worker.py
+++ b/containers/clearview/src/clearview_app/worker.py
@@ -4,7 +4,7 @@ import logging
import threading
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
-from datetime import datetime
+from datetime import datetime, timezone
from sqlalchemy import select
@@ -47,17 +47,21 @@ class ScanWorker:
def _process_next_job(self) -> bool:
with SessionLocal() as db:
+ # Atomic claim: lock the chosen queued row and skip rows already
+ # locked by another worker, so multiple workers/replicas never grab
+ # the same job. The status flip is committed in this transaction.
job = db.execute(
select(ScanJob)
.where(ScanJob.status == "queued")
.order_by(ScanJob.created_at.asc())
.limit(1)
+ .with_for_update(skip_locked=True)
).scalar_one_or_none()
if job is None:
return False
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
job.status = "running"
job.started_at = now
job.heartbeat_at = now
@@ -96,7 +100,7 @@ class ScanWorker:
job = db.get(ScanJob, job_id)
if not job:
return
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
job.heartbeat_at = now
job.updated_at = now
job.finished_at = now
@@ -113,7 +117,7 @@ class ScanWorker:
if not job or not target:
return
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
target.status = "running"
target.started_at = now
target.updated_at = now
@@ -128,7 +132,7 @@ class ScanWorker:
target = db.get(ScanTarget, target_id)
if not job or not target:
return
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
target.status = "failed"
target.attempts = 1
target.error_message = f"Preflight: {probe.message}"
@@ -173,7 +177,7 @@ class ScanWorker:
)
)
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
target.status = "completed"
target.attempts = attempt
target.error_message = None
@@ -203,7 +207,7 @@ class ScanWorker:
if not job or not target:
return
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
target.status = "failed"
target.attempts = max_attempts
target.error_message = last_error
@@ -252,7 +256,7 @@ class ScanWorker:
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if target:
- now = datetime.utcnow()
+ now = datetime.now(timezone.utc)
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
@@ -298,8 +302,8 @@ class ScanWorker:
job.scan_activity = activity
if items > 0:
job.items_scanned += items
- job.heartbeat_at = datetime.utcnow()
- job.updated_at = datetime.utcnow()
+ job.heartbeat_at = datetime.now(timezone.utc)
+ job.updated_at = datetime.now(timezone.utc)
db.commit()
except Exception: # noqa: BLE001
pass
diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md
index 1097da5..8d76c14 100644
--- a/docs/changelog-develop.md
+++ b/docs/changelog-develop.md
@@ -2,6 +2,42 @@
This file documents changes on the develop branch of this project.
+## 2026-05-26 — UI/UX: dead CSS removal, a11y, distinct risk colours, richer dashboard
+
+### Added
+- **Dashboard enrichment** — a fourth KPI card **With errors** (`#statErrors`, counts jobs that are `completed_with_errors` or have `failed_targets > 0`) and a **Recent jobs** panel (`#dashRecentJobs`, last 5 jobs, each row clickable to jump to its details). Populated from the existing `/api/scan-jobs` list in `refreshJobs()` via a new `renderDashRecent()`; all interpolated fields run through `escHtml()`.
+
+### Changed
+- **Removed dead CSS** — the pre-sidebar `.topbar`, `.topbar-actions`, and `.layout` rules (and their now-orphaned references inside the 930px/640px media queries) were deleted; the layout has used `.app-shell`/`.sidebar`/`.content` since the sidebar refactor.
+- **Accessibility** — focus outline strengthened from `rgba(14,165,233,0.38)` to a solid `var(--cv-accent)` (meets WCAG non-text 3:1) and now also covers `a:focus-visible`. On route changes (`applyRoute`), focus now moves to the new page's first heading (`h1/h2`, `tabindex=-1`) and `document.title` updates, so screen-reader/keyboard users land in the freshly shown content.
+- **Distinct risk colours** — the `risk.warn` badge changed from accent-blue (indistinguishable from `info`/`low`) to amber (`#854d0e` on `rgba(234,179,8,.18)`), giving a real low→high colour gradient.
+- **Consistent XSS escaping** — `job.id` and `job.source_type` in the Scan Jobs table are now passed through `escHtml()` (previously interpolated raw), matching the rest of the table.
+
+## 2026-05-26 — Split monolithic main.py into route modules
+
+### Changed
+- **`main.py` reduced from 1152 to 64 lines** — now a composition root that only wires the FastAPI app, scan-worker lifecycle, `/healthz`, `/api/version`, the `/` index + static mount, and `include_router` for the new route modules. All endpoint logic moved out verbatim (behaviour-preserving).
+- **New route modules** (flat modules at package level so existing single-dot relative imports stay unchanged — lower risk than a `routers/` subpackage): `api_tenants.py` (tenant profiles + certificate), `api_jobs.py` (all scan-job routes incl. CSV import, cancel/delete, resolve-sharing-links, resolve-groups, test-connection, Excel export, detail), `api_onboarding.py` (Microsoft connect/callback/scan-app). Shared helpers (`_resolve_credentials`, `_create_job_from_targets`, `_enumerate_all_*`, `_to_job_summary`, `_to_tenant_item`, `_build_export_filename`, `_sharing_link_risk_label`, `_extract_sharing_link_group_and_type`) extracted to `api_helpers.py`.
+- **Verified behaviour-preserving** — captured the OpenAPI route set before/after; both expose the identical 22 endpoints (`diff` empty). Built the image, booted against a fresh DB: `/healthz`, `/api/version`, `/api/tenants`, `/api/scan-jobs` all respond, invalid `scan_type` still returns 422, no startup errors.
+
+## 2026-05-26 — Correctness P1: token cache, atomic job claim, timezone-aware datetimes, scan_type validation
+
+### Changed
+- **Token cache now has TTL + thread lock + MSAL app reuse** (`scanners/sharepoint.py`) — `_TOKEN_CACHE` previously stored access tokens as plain strings forever, so long scans started failing with 401s once the ~1h token expired. It now stores `(token, expires_at)` and refreshes 60s before expiry, guarded by a new `_TOKEN_LOCK` (the worker fetches tokens from multiple threads). New `_get_msal_app()` caches one `ConfidentialClientApplication` per `(tenant, client, auth_method)` so MSAL's own token cache is reused instead of building a fresh app on every call.
+- **Atomic job claim** (`worker.py`) — the queued-job selection now uses `.with_for_update(skip_locked=True)` (`SELECT … FOR UPDATE SKIP LOCKED`), so multiple worker threads/replicas can never claim the same job. Behaviour is unchanged for the current single worker but is now replica-safe.
+- **Timezone-aware datetimes everywhere** — replaced all 24 `datetime.utcnow()` (naive, deprecated) with `datetime.now(timezone.utc)` across `models.py`, `worker.py`, `main.py`, and `cert.py`. SQLAlchemy datetime columns are now `DateTime(timezone=True)`; model defaults use a new `_utcnow()` helper. New Alembic migration `0002_timestamptz` converts existing `timestamp without time zone` columns to `timestamptz` (reinterpreting stored values as UTC), guarded per-column so it is a no-op on databases already timestamptz. **Behaviour note:** API datetimes now carry a UTC offset, so the frontend renders them correctly in local time (previously stored UTC was shown as if local).
+- **`scan_type` request validation** (`schemas.py`) — `CreateScanJobRequest.scan_type` is now `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` instead of free `str`; invalid values return HTTP 422. The response model keeps `str` so legacy rows never trigger a serialization error. Verified: `scan_type=bogus` → 422, valid type passes schema validation.
+
+## 2026-05-26 — Alembic migrations replace startup `create_all` + raw ALTERs
+
+### Added
+- **Alembic introduced (`alembic==1.14.0`)** — schema is now version-controlled instead of being patched at every startup. New `clearview_app/migrations/` package (`env.py` reuses the app's SQLAlchemy engine and `Base.metadata`; `versions/0001_baseline.py` baseline) and dev-only `containers/clearview/alembic.ini` for manual CLI use. The app builds the Alembic `Config` programmatically, so `alembic.ini` is not shipped in the image.
+- **Baseline migration `0001_baseline`** — creates the full current schema via `Base.metadata.create_all`, guaranteed identical to the models (the same DDL the app emitted before). Future schema changes become explicit Alembic revisions.
+- **Startup bootstrap `clearview_app/db_migrate.run_migrations()`** — idempotent, three cases: fresh DB → `upgrade head`; existing pre-Alembic DB (tables present, no `alembic_version`) → `stamp head` (adopt baseline without re-creating); already under Alembic → `upgrade head`. Verified end-to-end against throwaway databases (fresh upgrade, existing-DB stamp, re-run no-op) and a local image boot test (`/healthz` OK, schema + `alembic_version=0001_baseline`).
+
+### Changed
+- **`main.py` startup** — `on_startup()` now calls `run_migrations()` instead of `Base.metadata.create_all(bind=engine)` + `_ensure_schema_columns()`. The 18-statement raw `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` block (`_ensure_schema_columns`) is removed; unused `Base`/`engine` imports dropped. The existing dev/prod database is adopted automatically (stamped to baseline) on first start of the new build — no manual migration step required.
+
## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
### Added