Dev build 2026-05-26 15:12

This commit is contained in:
Ivo Oskamp 2026-05-26 15:12:35 +02:00
parent 61db7fe4a7
commit 0cdeabc0e6
22 changed files with 1639 additions and 1211 deletions

View File

@ -0,0 +1,45 @@
# Alembic config for manual CLI use during development, e.g.:
# cd containers/clearview && DATABASE_URL=postgresql://... PYTHONPATH=src alembic revision -m "msg"
#
# The application itself does NOT read this file: clearview_app.db_migrate builds
# an Alembic Config programmatically and env.py takes the database URL from
# DATABASE_URL via clearview_app.config. sqlalchemy.url is therefore left blank.
[alembic]
script_location = src/clearview_app/migrations
prepend_sys_path = src
sqlalchemy.url =
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARNING
handlers = console
qualname =
[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@ -1,6 +1,7 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
sqlalchemy==2.0.36
alembic==1.14.0
psycopg[binary]==3.2.3
python-multipart==0.0.12
requests==2.32.3

View File

@ -104,6 +104,8 @@
statTenants: document.getElementById('statTenants'),
statJobs: document.getElementById('statJobs'),
statRunning: document.getElementById('statRunning'),
statErrors: document.getElementById('statErrors'),
dashRecentJobs: document.getElementById('dashRecentJobs'),
};
// -------------------------------------------------------------------------
@ -610,6 +612,36 @@
// Jobs list
// -------------------------------------------------------------------------
function renderDashRecent(jobs) {
if (!els.dashRecentJobs) return;
if (!jobs.length) {
els.dashRecentJobs.innerHTML = '<tr><td colspan="6">No jobs yet.</td></tr>';
return;
}
els.dashRecentJobs.innerHTML = jobs.slice(0, 5).map(function (job) {
var jobIdSafe = escHtml(job.id);
var tenantLabel = job.tenant_name
? escHtml(job.tenant_name)
: '<span style="color:var(--cv-text-secondary)">manual</span>';
var progress = job.total_targets > 0 ? (job.processed_targets + '/' + job.total_targets) : '0/0';
return '<tr style="cursor:pointer" data-dash-job="' + jobIdSafe + '">' +
'<td><code>' + jobIdSafe + '</code></td>' +
'<td>' + escHtml(job.scan_type || 'sharepoint') + '</td>' +
'<td>' + tenantLabel + '</td>' +
'<td>' + statusBadge(job.status) + '</td>' +
'<td>' + progress + '</td>' +
'<td>' + formatDate(job.updated_at) + '</td>' +
'</tr>';
}).join('');
els.dashRecentJobs.querySelectorAll('[data-dash-job]').forEach(function (row) {
row.addEventListener('click', function () {
state.selectedJobId = row.getAttribute('data-dash-job');
navigateTo('jobs');
refreshSelectedJob().catch(function () {});
});
});
}
async function refreshJobs() {
const filterTenant = els.jobTenantFilter.value;
const filterType = els.jobTypeFilter ? els.jobTypeFilter.value : '';
@ -626,6 +658,12 @@
els.statRunning.textContent = String(jobs.filter(function (j) {
return j.status === 'running' || j.status === 'queued';
}).length);
if (els.statErrors) {
els.statErrors.textContent = String(jobs.filter(function (j) {
return j.status === 'completed_with_errors' || (j.failed_targets || 0) > 0;
}).length);
}
renderDashRecent(jobs);
if (!jobs.length) {
els.jobsTableBody.innerHTML = '<tr><td colspan="9">No jobs yet.</td></tr>';
@ -654,22 +692,23 @@
} else {
typeLabel = '<span class="risk ok">SharePoint</span>';
}
const jobIdSafe = escHtml(job.id);
return (
'<tr>' +
'<td><code>' + job.id + '</code></td>' +
'<td><code>' + jobIdSafe + '</code></td>' +
'<td>' + typeLabel + '</td>' +
'<td>' + tenantLabel + '</td>' +
'<td>' + job.source_type + '</td>' +
'<td>' + escHtml(job.source_type) + '</td>' +
'<td>' + statusBadge(job.status) + '</td>' +
'<td>' + progress + '</td>' +
'<td>' + (job.items_scanned > 0 ? job.items_scanned : '-') + '</td>' +
'<td>' + formatDate(job.updated_at) + '</td>' +
'<td>' +
'<div style="display:flex;gap:0.4rem">' +
'<button class="btn btn-outline btn-small" data-job-inspect="' + job.id + '">Inspect</button>' +
'<button class="btn btn-outline btn-small" data-job-inspect="' + jobIdSafe + '">Inspect</button>' +
(job.status === 'queued' || job.status === 'running'
? '<button class="btn btn-outline btn-small" data-job-cancel="' + job.id + '">Cancel</button>'
: '<button class="btn btn-outline btn-small" data-job-delete="' + job.id + '">Delete</button>') +
? '<button class="btn btn-outline btn-small" data-job-cancel="' + jobIdSafe + '">Cancel</button>'
: '<button class="btn btn-outline btn-small" data-job-delete="' + jobIdSafe + '">Delete</button>') +
'</div>' +
'</td>' +
'</tr>'
@ -1527,14 +1566,16 @@
return hash;
}
function applyRoute(route) {
function applyRoute(route, moveFocus) {
if (!ROUTE_TITLES[route]) {
route = 'dashboard';
}
state.currentRoute = route;
var activePage = null;
document.querySelectorAll('.route-page').forEach(function (page) {
if (page.getAttribute('data-route-page') === route) {
page.removeAttribute('hidden');
activePage = page;
} else {
page.setAttribute('hidden', '');
}
@ -1549,6 +1590,16 @@
if (els.contentTitle) {
els.contentTitle.textContent = ROUTE_TITLES[route];
}
document.title = 'Clearview | ' + ROUTE_TITLES[route];
// On user navigation, move focus to the new page's first heading so
// screen-reader and keyboard users land in the freshly shown content.
if (moveFocus && activePage) {
var heading = activePage.querySelector('h1, h2');
if (heading) {
heading.setAttribute('tabindex', '-1');
heading.focus();
}
}
}
function navigateTo(route) {
@ -1560,12 +1611,12 @@
if (window.location.hash !== hash) {
window.location.hash = hash;
} else {
applyRoute(route);
applyRoute(route, true);
}
}
window.addEventListener('hashchange', function () {
applyRoute(parseRoute());
applyRoute(parseRoute(), true);
});
applyRoute(parseRoute());

View File

@ -73,6 +73,33 @@
<span class="kpi" id="statRunning">0</span>
<span class="label">Active Jobs</span>
</article>
<article>
<span class="kpi" id="statErrors">0</span>
<span class="label">With errors</span>
</article>
</div>
</div>
<div class="panel">
<div class="panel-header split">
<h2>Recent jobs</h2>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Job ID</th>
<th>Type</th>
<th>Tenant</th>
<th>Status</th>
<th>Targets</th>
<th>Updated</th>
</tr>
</thead>
<tbody id="dashRecentJobs">
<tr><td colspan="6">No jobs yet.</td></tr>
</tbody>
</table>
</div>
</div>
</section>

View File

@ -55,38 +55,12 @@ body {
background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0));
}
.topbar {
width: min(1100px, calc(100% - 2rem));
margin: 1.1rem auto 0;
padding: 0.95rem 1.1rem;
border: 1px solid var(--cv-border);
border-radius: 18px;
background: rgba(255, 255, 255, 0.75);
backdrop-filter: blur(8px);
display: flex;
align-items: center;
justify-content: space-between;
box-shadow: 0 10px 24px rgba(20, 20, 19, 0.08);
}
.brand-logo {
height: 42px;
width: auto;
display: block;
}
.topbar-actions {
display: flex;
gap: 0.6rem;
}
.layout {
width: min(1100px, calc(100% - 2rem));
margin: 1rem auto 2.5rem;
display: grid;
gap: 1rem;
}
.hero,
.panel {
border-radius: 22px;
@ -131,7 +105,7 @@ h2 {
.hero-stats {
margin-top: 1.3rem;
display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr));
grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 0.75rem;
}
@ -291,8 +265,9 @@ textarea {
input:focus,
select:focus,
textarea:focus,
button:focus {
outline: 2px solid rgba(14, 165, 233, 0.38);
button:focus,
a:focus-visible {
outline: 2px solid var(--cv-accent);
outline-offset: 2px;
}
@ -533,8 +508,8 @@ strong {
}
.risk.warn {
background: rgba(14, 165, 233, 0.15);
color: var(--cv-accent-dark);
background: rgba(234, 179, 8, 0.18);
color: #854d0e;
}
.risk.high {
@ -584,12 +559,6 @@ strong {
}
@media (max-width: 930px) {
.topbar {
flex-direction: column;
align-items: flex-start;
gap: 0.8rem;
}
.hero-stats {
grid-template-columns: 1fr;
}
@ -616,11 +585,6 @@ strong {
}
@media (max-width: 640px) {
.layout,
.topbar {
width: calc(100% - 1rem);
}
.hero,
.panel {
border-radius: 16px;
@ -633,14 +597,6 @@ strong {
.hero h1 {
max-width: none;
}
.topbar-actions {
width: 100%;
}
.topbar-actions .btn {
flex: 1;
}
}
/* ===========================================================================

View File

@ -0,0 +1,321 @@
"""Shared helpers for the API route modules.
Extracted verbatim from the original monolithic ``main.py`` so the route
modules (``api_tenants``, ``api_jobs``) can share credential resolution, job
creation, response mapping, and export helpers without circular imports.
"""
from __future__ import annotations
import re
import uuid
from datetime import datetime, timezone
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from .db import SessionLocal
from .default_sites import is_default_site, normalize_site_url
from .models import ScanJob, ScanTarget, TenantProfile
from .scanners import AuthConfig
from .schemas import ScanJobCreateResponse, ScanJobSummary, TenantProfileItem
def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
"""
Extract (group_name, link_type) from principal values such as:
- SharingLinks.<guid>.<LinkType>.<guid>
- c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
"""
if not principal:
return None
text = principal.strip()
segments = [s.strip() for s in text.split("|") if s.strip()]
candidate = ""
for segment in reversed(segments):
if segment.lower().startswith("sharinglinks."):
candidate = segment
break
if not candidate and text.lower().startswith("sharinglinks."):
candidate = text
if not candidate:
return None
parts = candidate.split(".")
if len(parts) < 3:
return None
return candidate, parts[2]
_SCAN_TYPE_LABELS = {
"sharepoint": "Deviations",
"sharepoint_root": "Root",
"mailbox": "Mailbox",
"entra_groups": "EntraGroups",
}
def _build_export_filename(job: ScanJob, job_id: str) -> str:
tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
scan_type = job.scan_type or "sharepoint"
type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
short_id = job_id.replace("-", "")[-12:]
return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
def _enumerate_all_entra_groups(
tenant_id: str,
client_id: str,
client_secret: str | None,
profile_id: str | None,
) -> list[str]:
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
from .scanners import entra as _entra
try:
return _entra.list_all_groups(auth)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
def _enumerate_all_mailboxes(
organization: str | None,
tenant_id: str,
client_id: str,
client_secret: str | None,
profile_id: str | None,
) -> list[str]:
if not organization or "." not in organization:
raise HTTPException(
status_code=400,
detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
)
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
from .scanners import mailbox as _mailbox
try:
return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
def _resolve_credentials(
db,
tenant_profile_id: str | None,
tenant_id: str | None,
client_id: str | None,
client_secret: str | None,
) -> tuple[str, str, str | None, str | None]:
if tenant_profile_id:
profile = db.get(TenantProfile, tenant_profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
if not profile.client_secret and not profile.cert_thumbprint:
raise HTTPException(
status_code=400,
detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
)
return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
if tenant_id and client_id and client_secret:
return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
raise HTTPException(
status_code=400,
detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
)
def _create_job_from_targets(
raw_targets: list[str],
scan_type: str,
skip_default_sites: bool,
source_type: str,
tenant_id: str,
client_id: str,
client_secret: str,
tenant_profile_id: str | None = None,
) -> ScanJobCreateResponse:
accepted: list[str] = []
skipped_default_urls: list[str] = []
invalid: list[str] = []
seen: set[str] = set()
for raw in raw_targets:
if scan_type == "mailbox":
normalized = (raw or "").strip().lower()
if not normalized or "@" not in normalized:
invalid.append(raw)
continue
elif scan_type == "entra_groups":
normalized = (raw or "").strip()
if not normalized:
invalid.append(raw)
continue
else:
normalized = normalize_site_url(raw) or ""
if not normalized:
invalid.append(raw)
continue
if normalized in seen:
continue
seen.add(normalized)
if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
skipped_default_urls.append(normalized)
continue
accepted.append(normalized)
with SessionLocal() as db:
now = datetime.now(timezone.utc)
job = ScanJob(
id=str(uuid.uuid4()),
source_type=source_type,
scan_type=scan_type,
status="queued" if accepted else "completed",
skip_default_sites=skip_default_sites,
tenant_profile_id=tenant_profile_id,
auth_tenant_id=tenant_id,
auth_client_id=client_id,
auth_client_secret=client_secret,
total_targets=len(accepted),
skipped_targets=len(skipped_default_urls),
warning_message=None,
error_message=None,
created_at=now,
updated_at=now,
finished_at=now if not accepted else None,
)
if not accepted:
if scan_type == "mailbox":
job.warning_message = "No scannable mailboxes after validation"
else:
job.warning_message = "No scannable sites after validation and default-site filtering"
db.add(job)
db.flush()
for index, target in enumerate(accepted, start=1):
db.add(
ScanTarget(
job_id=job.id,
site_url=target,
source_row=index,
status="queued",
attempts=0,
created_at=now,
updated_at=now,
)
)
db.commit()
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
job = db.execute(stmt).unique().scalar_one()
return ScanJobCreateResponse(
job=_to_job_summary(job),
accepted_urls=accepted,
skipped_default_urls=skipped_default_urls,
invalid_urls=invalid,
)
def _to_job_summary(job: ScanJob) -> ScanJobSummary:
return ScanJobSummary(
id=job.id,
status=job.status,
source_type=job.source_type,
scan_type=job.scan_type or "sharepoint",
skip_default_sites=job.skip_default_sites,
tenant_profile_id=job.tenant_profile_id,
tenant_name=job.tenant_profile.name if job.tenant_profile else None,
total_targets=job.total_targets,
processed_targets=job.processed_targets,
successful_targets=job.successful_targets,
failed_targets=job.failed_targets,
skipped_targets=job.skipped_targets,
items_scanned=job.items_scanned,
scan_activity=job.scan_activity if job.status == "running" else None,
warning_message=job.warning_message,
error_message=job.error_message,
created_at=job.created_at,
updated_at=job.updated_at,
started_at=job.started_at,
finished_at=job.finished_at,
)
def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
return TenantProfileItem(
id=profile.id,
name=profile.name,
tenant_id=profile.tenant_id,
primary_domain=profile.primary_domain,
client_id=profile.client_id,
has_certificate=bool(profile.cert_thumbprint),
cert_thumbprint=profile.cert_thumbprint,
cert_expires_at=profile.cert_expires_at,
created_at=profile.created_at,
updated_at=profile.updated_at,
)
def _sharing_link_risk_label(principal: str) -> str:
if not principal.startswith("SharingLinks."):
return ""
parts = principal.split(".", 3)
link_type = parts[2] if len(parts) >= 3 else ""
if link_type.startswith("Anonymous"):
return "Critical"
if link_type == "Flexible":
return "High"
if link_type.startswith("Organization"):
return "Low"
if link_type.startswith("Direct"):
return "Low"
return "Unknown"

View File

@ -0,0 +1,645 @@
"""Scan-job routes: create, list, inspect, cancel, delete, resolve, export."""
from __future__ import annotations
import io
from datetime import datetime, timezone
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
from fastapi.responses import Response, StreamingResponse
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from .api_helpers import (
_build_export_filename,
_create_job_from_targets,
_enumerate_all_entra_groups,
_enumerate_all_mailboxes,
_extract_sharing_link_group_and_type,
_resolve_credentials,
_sharing_link_risk_label,
_to_job_summary,
)
from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
from .db import SessionLocal
from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
from .scanners import AuthConfig, probe
from .schemas import (
CreateScanJobRequest,
PermissionDeviationItem,
ProbeResultResponse,
ResolveGroupsResponse,
ResolveSharingLinksRequest,
ResolveSharingLinksResponse,
ScanJobCreateResponse,
ScanJobDetail,
ScanJobSummary,
ScanTargetItem,
SharingLinkTypesResponse,
)
router = APIRouter()
@router.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
with SessionLocal() as db:
tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
db=db,
tenant_profile_id=payload.tenant_profile_id,
tenant_id=payload.tenant_id,
client_id=payload.client_id,
client_secret=payload.client_secret,
)
source_type = "manual"
if payload.scan_type == "entra_groups":
if payload.scan_all_groups:
raw_targets = _enumerate_all_entra_groups(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
profile_id=profile_id,
)
source_type = "tenant_all"
else:
raw_targets = [str(g) for g in payload.group_ids]
elif payload.scan_type == "mailbox":
if payload.scan_all_mailboxes:
organization = payload.organization
if (not organization) and profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile and profile.primary_domain:
organization = profile.primary_domain
raw_targets = _enumerate_all_mailboxes(
organization=organization,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
profile_id=profile_id,
)
source_type = "tenant_all"
else:
raw_targets = [str(m) for m in payload.mailboxes]
else:
raw_targets = [str(item) for item in payload.site_urls]
return _create_job_from_targets(
raw_targets=raw_targets,
scan_type=payload.scan_type,
skip_default_sites=payload.skip_default_sites,
source_type=source_type,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
tenant_profile_id=profile_id,
)
@router.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
def create_scan_job_from_csv(
skip_default_sites: bool = True,
scan_type: str = Form("sharepoint"),
tenant_profile_id: str | None = Form(None),
tenant_id: str | None = Form(None),
client_id: str | None = Form(None),
client_secret: str | None = Form(None),
file: UploadFile = File(...),
) -> ScanJobCreateResponse:
with SessionLocal() as db:
resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
db=db,
tenant_profile_id=tenant_profile_id,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
)
content = file.file.read()
if scan_type == "mailbox":
parsed = parse_mailboxes_csv(content)
targets = parsed.mailboxes
elif scan_type == "entra_groups":
parsed = parse_entra_groups_csv(content)
targets = parsed.urls
else:
parsed = parse_sites_csv(content)
targets = parsed.urls
response = _create_job_from_targets(
raw_targets=targets,
scan_type=scan_type,
skip_default_sites=skip_default_sites,
source_type="csv",
tenant_id=resolved_tenant_id,
client_id=resolved_client_id,
client_secret=resolved_client_secret,
tenant_profile_id=profile_id,
)
if parsed.invalid_rows:
csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
with SessionLocal() as db:
job = db.get(ScanJob, response.job.id)
if job:
if job.warning_message:
job.warning_message = f"{job.warning_message} | {csv_warning}"
else:
job.warning_message = csv_warning
job.updated_at = datetime.now(timezone.utc)
db.commit()
db.refresh(job)
response.job.warning_message = job.warning_message
return response
@router.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
def cancel_scan_job(job_id: str) -> ScanJobSummary:
with SessionLocal() as db:
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
job = db.execute(stmt).unique().scalar_one_or_none()
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status not in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is not queued or running")
now = datetime.now(timezone.utc)
job.status = "cancelled"
job.updated_at = now
job.finished_at = now
job.scan_activity = None
db.commit()
db.refresh(job)
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
job = db.execute(stmt).unique().scalar_one()
return _to_job_summary(job)
@router.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
def delete_scan_job(job_id: str) -> Response:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
db.delete(job)
db.commit()
return Response(status_code=204)
@router.get("/api/scan-jobs", response_model=list[ScanJobSummary])
def list_scan_jobs(
limit: int = 20,
tenant_profile_id: str | None = None,
scan_type: str | None = None,
) -> list[ScanJobSummary]:
with SessionLocal() as db:
stmt = (
select(ScanJob)
.options(joinedload(ScanJob.tenant_profile))
.order_by(ScanJob.created_at.desc())
.limit(max(1, min(limit, 100)))
)
if tenant_profile_id:
stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
if scan_type:
stmt = stmt.where(ScanJob.scan_type == scan_type)
jobs = list(db.execute(stmt).unique().scalars())
return [_to_job_summary(job) for job in jobs]
@router.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
principals = list(
db.execute(
select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
).scalars()
)
type_counts: dict[str, int] = {}
for principal in principals:
parsed = _extract_sharing_link_group_and_type(str(principal or ""))
if not parsed:
continue
_group_name, link_type = parsed
type_counts[link_type] = type_counts.get(link_type, 0) + 1
return SharingLinkTypesResponse(type_counts=type_counts)
@router.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
from .scanner import resolve_sharing_link_members
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
all_deviations = list(
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
parsed = _extract_sharing_link_group_and_type(dev.principal)
if not parsed:
continue
group_name, link_type = parsed
if link_type not in payload.link_types:
continue
key = (dev.site_url, group_name)
groups.setdefault(key, []).append(dev.id)
updated_deviations = 0
for (site_url, group_name), dev_ids in groups.items():
members = resolve_sharing_link_members(site_url, group_name, auth)
resolved_members = ", ".join(members) if members else ""
with SessionLocal() as db:
for dev_id in dev_ids:
dev = db.get(PermissionDeviation, dev_id)
if dev:
dev.resolved_members = resolved_members
db.commit()
updated_deviations += len(dev_ids)
return ResolveSharingLinksResponse(
resolved_groups=len(groups),
updated_deviations=updated_deviations,
)
@router.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
"""
Expand group principals on this job's deviations and write each group's
member list to permission_deviations.resolved_members. Handles both
classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
assigned directly at root (via Microsoft Graph). Skips email-shape users
and SharingLinks groups (those have their own resolver).
"""
from .scanners.sharepoint import (
is_aad_group_principal,
is_sharepoint_group_principal,
resolve_aad_group_members,
resolve_sharing_link_members,
)
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
if (job.scan_type or "sharepoint") == "mailbox":
raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
all_deviations = list(
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group deviations by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
continue
key = (dev.site_url, dev.principal)
groups.setdefault(key, []).append(dev.id)
resolved = 0
skipped = 0
updated = 0
for (site_url, group_name), dev_ids in groups.items():
try:
if is_aad_group_principal(group_name):
members = resolve_aad_group_members(group_name, auth)
else:
members = resolve_sharing_link_members(site_url, group_name, auth)
except Exception: # noqa: BLE001
members = []
if not members:
skipped += 1
continue
resolved_text = ", ".join(members)
with SessionLocal() as db:
for dev_id in dev_ids:
dev = db.get(PermissionDeviation, dev_id)
if dev:
dev.resolved_members = resolved_text
db.commit()
resolved += 1
updated += len(dev_ids)
return ResolveGroupsResponse(
resolved_groups=resolved,
skipped_groups=skipped,
updated_deviations=updated,
)
@router.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
target = db.get(ScanTarget, target_id)
if not target or target.job_id != job_id:
raise HTTPException(status_code=404, detail="Target not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
site_url = target.site_url
job_scan_type = job.scan_type or "sharepoint"
result = probe(job_scan_type, site_url, auth)
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if not target:
raise HTTPException(status_code=404, detail="Target not found")
now = datetime.now(timezone.utc)
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
target.updated_at = now
db.commit()
db.refresh(target)
return ProbeResultResponse(
target_id=target.id,
ok=result.ok,
message=result.message,
last_probe_at=target.last_probe_at,
)
@router.get("/api/scan-jobs/{job_id}/export")
def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
import openpyxl
from openpyxl.styles import Font, PatternFill
with SessionLocal() as db:
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
if not job:
raise HTTPException(status_code=404, detail="Job not found")
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
if site_url:
targets_q = targets_q.where(ScanTarget.site_url == site_url)
targets = list(db.execute(targets_q).scalars())
deviations_q = (
select(PermissionDeviation)
.where(PermissionDeviation.job_id == job.id)
.order_by(PermissionDeviation.id.desc())
)
if site_url:
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
deviations = list(db.execute(deviations_q).scalars())
wb = openpyxl.Workbook()
header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
header_font_white = Font(bold=True, color="FFFFFF")
_risk_styles: dict[str, tuple] = {
"Critical": (
PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
Font(bold=True, color="7B0000"),
),
"High": (
PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
Font(bold=True, color="7C2D00"),
),
"Low": (
PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
Font(bold=True, color="0C4A6E"),
),
"Unknown": (
PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
Font(bold=True, color="555555"),
),
}
def _style_header(ws, headers):
ws.append(headers)
for cell in ws[1]:
cell.font = header_font_white
cell.fill = header_fill
scan_type = job.scan_type or "sharepoint"
target_label = {
"sharepoint": "Site URL",
"sharepoint_root": "Site URL",
"mailbox": "Mailbox",
"entra_groups": "Group",
}.get(scan_type, "Target")
# Targets sheet
ws_targets = wb.active
ws_targets.title = "Targets"
_style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
for t in targets:
ws_targets.append([
t.site_url,
t.status,
t.attempts,
t.error_message or "",
t.started_at.isoformat() if t.started_at else "",
t.finished_at.isoformat() if t.finished_at else "",
])
for col in ws_targets.columns:
ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
# Results sheet — name and columns depend on scan type
if scan_type == "mailbox":
ws_dev = wb.create_sheet("Mailbox Permissions")
_style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
for d in deviations:
ws_dev.append([
d.site_url,
d.object_url,
d.permission_type or d.object_type,
d.principal,
d.role_name,
])
elif scan_type == "entra_groups":
ws_dev = wb.create_sheet("Group Memberships")
_style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
for d in deviations:
ws_dev.append([
d.object_url,
d.permission_type or "",
d.principal,
d.role_name,
])
elif scan_type == "sharepoint_root":
ws_dev = wb.create_sheet("Root Permissions")
_style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
for d in deviations:
ws_dev.append([
d.site_url,
d.principal,
d.resolved_members or "",
d.role_name,
])
else:
ws_dev = wb.create_sheet("Deviations")
_style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
for d in deviations:
base = (d.site_url or "").rstrip("/")
obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
link_risk = _sharing_link_risk_label(d.principal)
ws_dev.append([
d.site_url,
obj_rel,
d.object_type,
d.principal,
link_risk,
d.resolved_members or "",
d.role_name,
d.delta_type,
])
if link_risk in _risk_styles:
risk_fill, risk_font = _risk_styles[link_risk]
risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
risk_cell.fill = risk_fill
risk_cell.font = risk_font
for col in ws_dev.columns:
ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
buf = io.BytesIO()
wb.save(buf)
buf.seek(0)
filename = _build_export_filename(job, job_id)
return StreamingResponse(
buf,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@router.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
with SessionLocal() as db:
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
if not job:
raise HTTPException(status_code=404, detail="Job not found")
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
if site_url:
targets_q = targets_q.where(ScanTarget.site_url == site_url)
targets = list(db.execute(targets_q).scalars())
deviations_q = (
select(PermissionDeviation)
.where(PermissionDeviation.job_id == job.id)
.order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
)
if site_url:
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
else:
deviations_q = deviations_q.limit(1000)
deviations = list(db.execute(deviations_q).scalars())
return ScanJobDetail(
**_to_job_summary(job).model_dump(),
targets=[
ScanTargetItem(
id=t.id,
site_url=t.site_url,
status=t.status,
attempts=t.attempts,
error_message=t.error_message,
started_at=t.started_at,
finished_at=t.finished_at,
last_probe_at=t.last_probe_at,
last_probe_ok=t.last_probe_ok,
last_probe_message=t.last_probe_message,
)
for t in targets
],
deviations=[
PermissionDeviationItem(
id=d.id,
site_url=d.site_url,
object_url=d.object_url,
object_type=d.object_type,
principal=d.principal,
role_name=d.role_name,
delta_type=d.delta_type,
permission_type=d.permission_type,
resolved_members=d.resolved_members,
created_at=d.created_at,
)
for d in deviations
],
)

View File

@ -0,0 +1,76 @@
"""Microsoft onboarding routes (admin-consent connect + scan-app creation)."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
from fastapi.responses import RedirectResponse
from .onboarding import (
OnboardingError,
consume_callback_state,
create_connect_url,
create_scan_app_for_tenant,
)
from .schemas import (
ConnectMicrosoftResponse,
CreateScanAppRequest,
CreateScanAppResponse,
)
router = APIRouter()
@router.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
try:
result = create_scan_app_for_tenant(
tenant_id=payload.tenant_id,
display_name=payload.display_name,
)
except OnboardingError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
return CreateScanAppResponse(
tenant_id=result.tenant_id,
client_id=result.client_id,
client_secret=result.client_secret,
app_object_id=result.app_object_id,
service_principal_id=result.service_principal_id,
display_name=result.display_name,
)
@router.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
try:
return ConnectMicrosoftResponse(connect_url=create_connect_url())
except OnboardingError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@router.get("/api/onboarding/microsoft/callback")
def onboarding_microsoft_callback(
tenant: str | None = None,
state: str | None = None,
error: str | None = None,
error_description: str | None = None,
) -> RedirectResponse:
if error:
message = (error_description or error).replace(" ", "+")
return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
if not state or not consume_callback_state(state):
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
if not tenant:
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
@router.get("/api/onboarding/status")
def onboarding_status() -> dict[str, bool]:
from . import config
automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
return {"automated_available": automated}

View File

@ -0,0 +1,86 @@
"""Tenant profile + certificate routes."""
from __future__ import annotations
import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
from sqlalchemy import select, text
from .api_helpers import _to_tenant_item
from .cert import generate_tenant_certificate
from .db import SessionLocal
from .models import TenantProfile
from .schemas import (
CreateTenantProfileRequest,
TenantCertificateResponse,
TenantProfileItem,
)
router = APIRouter()
@router.get("/api/tenants", response_model=list[TenantProfileItem])
def list_tenants() -> list[TenantProfileItem]:
with SessionLocal() as db:
profiles = list(
db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
)
return [_to_tenant_item(p) for p in profiles]
@router.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
with SessionLocal() as db:
now = datetime.now(timezone.utc)
profile = TenantProfile(
id=str(uuid.uuid4()),
name=payload.name.strip(),
tenant_id=payload.tenant_id.strip(),
primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
client_id=payload.client_id.strip(),
client_secret=payload.client_secret.strip() if payload.client_secret else None,
created_at=now,
updated_at=now,
)
db.add(profile)
db.commit()
db.refresh(profile)
return _to_tenant_item(profile)
@router.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
def generate_certificate(profile_id: str) -> TenantCertificateResponse:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
result = generate_tenant_certificate()
profile.cert_private_key = result.private_key_pem
profile.cert_public_pem = result.public_cert_pem
profile.cert_thumbprint = result.thumbprint
profile.cert_expires_at = result.expires_at
profile.updated_at = datetime.now(timezone.utc)
db.commit()
return TenantCertificateResponse(
thumbprint=result.thumbprint,
expires_at=result.expires_at,
public_cert_pem=result.public_cert_pem,
)
@router.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
def delete_tenant(profile_id: str) -> Response:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
# Detach jobs from this profile before deleting
db.execute(
text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
{"pid": profile_id},
)
db.delete(profile)
db.commit()
return Response(status_code=204)

View File

@ -2,7 +2,7 @@ from __future__ import annotations
import hashlib
from dataclasses import dataclass
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from cryptography import x509
from cryptography.hazmat.primitives import hashes, serialization
@ -30,7 +30,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
subject = x509.Name([
x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"),
])
expires_at = datetime.utcnow() + timedelta(days=365 * valid_years)
expires_at = datetime.now(timezone.utc) + timedelta(days=365 * valid_years)
cert = (
x509.CertificateBuilder()
@ -38,7 +38,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
.issuer_name(subject)
.public_key(private_key.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.utcnow())
.not_valid_before(datetime.now(timezone.utc))
.not_valid_after(expires_at)
.sign(private_key, hashes.SHA256())
)

View File

@ -0,0 +1,53 @@
"""Database migration bootstrap.
Replaces the previous ``Base.metadata.create_all`` + ``_ensure_schema_columns``
startup path with Alembic. The bootstrap is idempotent and handles three cases:
* **Fresh database** (no tables): run ``upgrade head`` to create the schema and
record the Alembic version.
* **Existing pre-Alembic database** (tables present, no ``alembic_version``):
``stamp head`` adopt the baseline without re-creating existing tables.
* **Already under Alembic**: run ``upgrade head`` to apply any new revisions.
"""
from __future__ import annotations
import logging
from pathlib import Path
from alembic import command
from alembic.config import Config
from sqlalchemy import inspect
from .db import engine
log = logging.getLogger(__name__)
_MIGRATIONS_DIR = Path(__file__).resolve().parent / "migrations"
# A table that exists in every pre-Alembic Clearview database; its presence
# (without alembic_version) marks a database that predates Alembic adoption.
_SENTINEL_TABLE = "scan_jobs"
def _alembic_config() -> Config:
cfg = Config()
cfg.set_main_option("script_location", str(_MIGRATIONS_DIR))
return cfg
_BASELINE_REVISION = "0001_baseline"
def run_migrations() -> None:
"""Bring the database schema up to date (see module docstring)."""
cfg = _alembic_config()
tables = set(inspect(engine).get_table_names())
if "alembic_version" not in tables and _SENTINEL_TABLE in tables:
# Pre-Alembic DB: it already matches the baseline, so adopt that
# revision without re-creating tables, then let upgrade apply any
# later migrations (e.g. the timestamptz conversion in 0002).
log.info("Existing pre-Alembic schema detected; stamping baseline %s.", _BASELINE_REVISION)
command.stamp(cfg, _BASELINE_REVISION)
log.info("Applying Alembic migrations (upgrade head).")
command.upgrade(cfg, "head")

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
"""Alembic environment for Clearview.
Reuses the application's SQLAlchemy engine (already configured with the
normalized DATABASE_URL and pool_pre_ping) so migrations run against exactly
the same database the app uses. Logging config from alembic.ini is applied
only when Alembic is invoked through the CLI; programmatic invocation from
``clearview_app.db_migrate`` passes a Config without a file.
"""
from __future__ import annotations
from logging.config import fileConfig
from alembic import context
from clearview_app.config import DATABASE_URL
from clearview_app.db import _normalize_database_url, engine as app_engine
from clearview_app.models import Base
config = context.config
if config.config_file_name is not None:
try:
fileConfig(config.config_file_name)
except Exception: # noqa: BLE001 - logging config is best-effort
pass
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Emit SQL to stdout without a live DB connection."""
context.configure(
url=_normalize_database_url(DATABASE_URL),
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations against the live database via the app engine."""
with app_engine.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from __future__ import annotations
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@ -0,0 +1,31 @@
"""baseline schema
Captures the full Clearview schema as defined by the SQLAlchemy models at the
time Alembic was adopted. Creating it via ``Base.metadata.create_all`` keeps the
baseline guaranteed-identical to the models (the same DDL the app emitted before
Alembic). Existing databases are ``stamp``-ed to this revision rather than
re-running ``upgrade`` (see ``clearview_app.db_migrate``).
Revision ID: 0001_baseline
Revises:
Create Date: 2026-05-26
"""
from __future__ import annotations
from alembic import op
from clearview_app.models import Base
# revision identifiers, used by Alembic.
revision = "0001_baseline"
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
Base.metadata.create_all(bind=op.get_bind())
def downgrade() -> None:
Base.metadata.drop_all(bind=op.get_bind())

View File

@ -0,0 +1,63 @@
"""convert timestamp columns to timestamptz
The app now uses timezone-aware UTC datetimes (DateTime(timezone=True)).
Existing databases store naive ``timestamp without time zone`` values that were
written as UTC, so we reinterpret them as UTC while converting. The conversion
is guarded per column on the current type, so it is a no-op on databases whose
columns are already ``timestamptz`` (e.g. a fresh DB created from the updated
baseline models).
Revision ID: 0002_timestamptz
Revises: 0001_baseline
Create Date: 2026-05-26
"""
from __future__ import annotations
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "0002_timestamptz"
down_revision = "0001_baseline"
branch_labels = None
depends_on = None
# Table -> datetime columns (names come from our own models, never user input).
_COLUMNS: dict[str, tuple[str, ...]] = {
"tenant_profiles": ("cert_expires_at", "created_at", "updated_at"),
"scan_jobs": ("created_at", "updated_at", "started_at", "finished_at", "heartbeat_at"),
"scan_targets": ("last_probe_at", "created_at", "updated_at", "started_at", "finished_at"),
"permission_deviations": ("created_at",),
}
def _column_type(bind, table: str, column: str) -> str | None:
return bind.execute(
sa.text(
"SELECT data_type FROM information_schema.columns "
"WHERE table_name = :t AND column_name = :c"
),
{"t": table, "c": column},
).scalar()
def upgrade() -> None:
bind = op.get_bind()
for table, columns in _COLUMNS.items():
for column in columns:
if _column_type(bind, table, column) == "timestamp without time zone":
op.execute(
f'ALTER TABLE {table} ALTER COLUMN {column} '
f"TYPE timestamptz USING {column} AT TIME ZONE 'UTC'"
)
def downgrade() -> None:
bind = op.get_bind()
for table, columns in _COLUMNS.items():
for column in columns:
if _column_type(bind, table, column) == "timestamp with time zone":
op.execute(
f'ALTER TABLE {table} ALTER COLUMN {column} '
f"TYPE timestamp USING {column} AT TIME ZONE 'UTC'"
)

View File

@ -1,11 +1,16 @@
from __future__ import annotations
from datetime import datetime
from datetime import datetime, timezone
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
def _utcnow() -> datetime:
"""Timezone-aware UTC now, used as the default for timestamp columns."""
return datetime.now(timezone.utc)
class Base(DeclarativeBase):
pass
@ -22,9 +27,9 @@ class TenantProfile(Base):
cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile")
@ -56,11 +61,11 @@ class ScanJob(Base):
warning_message: Mapped[str | None] = mapped_column(Text, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs")
targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan")
@ -79,14 +84,14 @@ class ScanTarget(Base):
attempts: Mapped[int] = mapped_column(Integer, default=0)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
last_probe_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
job: Mapped[ScanJob] = relationship(back_populates="targets")
deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan")
@ -108,7 +113,7 @@ class PermissionDeviation(Base):
permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
job: Mapped[ScanJob] = relationship(back_populates="deviations")
target: Mapped[ScanTarget] = relationship(back_populates="deviations")

View File

@ -1,6 +1,7 @@
from __future__ import annotations
import re
import threading
import time
from dataclasses import dataclass
from urllib.parse import urlparse
@ -32,7 +33,13 @@ class PermissionEntry:
role_name: str
_TOKEN_CACHE: dict[str, str] = {}
# Cache maps cache_key -> (access_token, expires_at_epoch). Guarded by
# _TOKEN_LOCK because the worker acquires tokens from multiple threads.
_TOKEN_CACHE: dict[str, tuple[str, float]] = {}
_TOKEN_LOCK = threading.Lock()
# Reuse one MSAL app per (tenant, client, auth_method) so MSAL's own token
# cache works and refreshes app tokens automatically.
_MSAL_APPS: dict[str, "msal.ConfidentialClientApplication"] = {}
def scan_site_for_deviations(
@ -612,18 +619,20 @@ def _probe_hint(error: str, stage: str) -> str:
return error[:220]
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
cached = _TOKEN_CACHE.get(cache_key)
if cached:
return cached
def _get_msal_app(auth: AuthConfig, auth_method: str) -> "msal.ConfidentialClientApplication":
"""Return a cached ConfidentialClientApplication for these credentials.
Reusing the app object lets MSAL's built-in token cache serve and refresh
app-only tokens instead of re-authenticating on every call.
"""
app_key = f"{auth.tenant_id}|{auth.client_id}|{auth_method}"
app = _MSAL_APPS.get(app_key)
if app is not None:
return app
scope = f"https://{host}/.default"
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
if auth_method == "cert":
client_credential = {
client_credential: dict[str, str | None] | str | None = {
"thumbprint": auth.cert_thumbprint,
"private_key": auth.cert_private_key,
}
@ -635,6 +644,21 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
authority=authority,
client_credential=client_credential,
)
_MSAL_APPS[app_key] = app
return app
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
with _TOKEN_LOCK:
cached = _TOKEN_CACHE.get(cache_key)
if cached is not None and time.time() < cached[1]:
return cached[0]
scope = f"https://{host}/.default"
app = _get_msal_app(auth, auth_method)
result = app.acquire_token_for_client(scopes=[scope])
if "access_token" not in result:
@ -643,7 +667,10 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
token = str(result["access_token"])
_TOKEN_CACHE[cache_key] = token
# expires_in is seconds-from-now; refresh 60s early to avoid edge expiry.
expires_in = int(result.get("expires_in", 3600))
expires_at = time.time() + max(expires_in - 60, 0)
_TOKEN_CACHE[cache_key] = (token, expires_at)
return token

View File

@ -1,9 +1,14 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field, HttpUrl
# Valid scan types, mirrored by the frontend scan-type dropdowns. Used to
# validate incoming job requests (FastAPI returns 422 on anything else).
ScanType = Literal["sharepoint", "sharepoint_root", "mailbox", "entra_groups"]
class CreateTenantProfileRequest(BaseModel):
name: str
@ -33,7 +38,7 @@ class TenantCertificateResponse(BaseModel):
class CreateScanJobRequest(BaseModel):
scan_type: str = "sharepoint"
scan_type: ScanType = "sharepoint"
site_urls: list[HttpUrl] = Field(default_factory=list)
mailboxes: list[str] = Field(default_factory=list)
scan_all_mailboxes: bool = False

View File

@ -7,7 +7,7 @@ history, so operators can see exactly which image build is running.
from __future__ import annotations
VERSION = "v0.1.0"
BUILD = 1
BUILD = 2
def display_version() -> str:

View File

@ -4,7 +4,7 @@ import logging
import threading
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
from datetime import datetime
from datetime import datetime, timezone
from sqlalchemy import select
@ -47,17 +47,21 @@ class ScanWorker:
def _process_next_job(self) -> bool:
with SessionLocal() as db:
# Atomic claim: lock the chosen queued row and skip rows already
# locked by another worker, so multiple workers/replicas never grab
# the same job. The status flip is committed in this transaction.
job = db.execute(
select(ScanJob)
.where(ScanJob.status == "queued")
.order_by(ScanJob.created_at.asc())
.limit(1)
.with_for_update(skip_locked=True)
).scalar_one_or_none()
if job is None:
return False
now = datetime.utcnow()
now = datetime.now(timezone.utc)
job.status = "running"
job.started_at = now
job.heartbeat_at = now
@ -96,7 +100,7 @@ class ScanWorker:
job = db.get(ScanJob, job_id)
if not job:
return
now = datetime.utcnow()
now = datetime.now(timezone.utc)
job.heartbeat_at = now
job.updated_at = now
job.finished_at = now
@ -113,7 +117,7 @@ class ScanWorker:
if not job or not target:
return
now = datetime.utcnow()
now = datetime.now(timezone.utc)
target.status = "running"
target.started_at = now
target.updated_at = now
@ -128,7 +132,7 @@ class ScanWorker:
target = db.get(ScanTarget, target_id)
if not job or not target:
return
now = datetime.utcnow()
now = datetime.now(timezone.utc)
target.status = "failed"
target.attempts = 1
target.error_message = f"Preflight: {probe.message}"
@ -173,7 +177,7 @@ class ScanWorker:
)
)
now = datetime.utcnow()
now = datetime.now(timezone.utc)
target.status = "completed"
target.attempts = attempt
target.error_message = None
@ -203,7 +207,7 @@ class ScanWorker:
if not job or not target:
return
now = datetime.utcnow()
now = datetime.now(timezone.utc)
target.status = "failed"
target.attempts = max_attempts
target.error_message = last_error
@ -252,7 +256,7 @@ class ScanWorker:
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if target:
now = datetime.utcnow()
now = datetime.now(timezone.utc)
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
@ -298,8 +302,8 @@ class ScanWorker:
job.scan_activity = activity
if items > 0:
job.items_scanned += items
job.heartbeat_at = datetime.utcnow()
job.updated_at = datetime.utcnow()
job.heartbeat_at = datetime.now(timezone.utc)
job.updated_at = datetime.now(timezone.utc)
db.commit()
except Exception: # noqa: BLE001
pass

View File

@ -2,6 +2,42 @@
This file documents changes on the develop branch of this project.
## 2026-05-26 — UI/UX: dead CSS removal, a11y, distinct risk colours, richer dashboard
### Added
- **Dashboard enrichment** — a fourth KPI card **With errors** (`#statErrors`, counts jobs that are `completed_with_errors` or have `failed_targets > 0`) and a **Recent jobs** panel (`#dashRecentJobs`, last 5 jobs, each row clickable to jump to its details). Populated from the existing `/api/scan-jobs` list in `refreshJobs()` via a new `renderDashRecent()`; all interpolated fields run through `escHtml()`.
### Changed
- **Removed dead CSS** — the pre-sidebar `.topbar`, `.topbar-actions`, and `.layout` rules (and their now-orphaned references inside the 930px/640px media queries) were deleted; the layout has used `.app-shell`/`.sidebar`/`.content` since the sidebar refactor.
- **Accessibility** — focus outline strengthened from `rgba(14,165,233,0.38)` to a solid `var(--cv-accent)` (meets WCAG non-text 3:1) and now also covers `a:focus-visible`. On route changes (`applyRoute`), focus now moves to the new page's first heading (`h1/h2`, `tabindex=-1`) and `document.title` updates, so screen-reader/keyboard users land in the freshly shown content.
- **Distinct risk colours** — the `risk.warn` badge changed from accent-blue (indistinguishable from `info`/`low`) to amber (`#854d0e` on `rgba(234,179,8,.18)`), giving a real low→high colour gradient.
- **Consistent XSS escaping**`job.id` and `job.source_type` in the Scan Jobs table are now passed through `escHtml()` (previously interpolated raw), matching the rest of the table.
## 2026-05-26 — Split monolithic main.py into route modules
### Changed
- **`main.py` reduced from 1152 to 64 lines** — now a composition root that only wires the FastAPI app, scan-worker lifecycle, `/healthz`, `/api/version`, the `/` index + static mount, and `include_router` for the new route modules. All endpoint logic moved out verbatim (behaviour-preserving).
- **New route modules** (flat modules at package level so existing single-dot relative imports stay unchanged — lower risk than a `routers/` subpackage): `api_tenants.py` (tenant profiles + certificate), `api_jobs.py` (all scan-job routes incl. CSV import, cancel/delete, resolve-sharing-links, resolve-groups, test-connection, Excel export, detail), `api_onboarding.py` (Microsoft connect/callback/scan-app). Shared helpers (`_resolve_credentials`, `_create_job_from_targets`, `_enumerate_all_*`, `_to_job_summary`, `_to_tenant_item`, `_build_export_filename`, `_sharing_link_risk_label`, `_extract_sharing_link_group_and_type`) extracted to `api_helpers.py`.
- **Verified behaviour-preserving** — captured the OpenAPI route set before/after; both expose the identical 22 endpoints (`diff` empty). Built the image, booted against a fresh DB: `/healthz`, `/api/version`, `/api/tenants`, `/api/scan-jobs` all respond, invalid `scan_type` still returns 422, no startup errors.
## 2026-05-26 — Correctness P1: token cache, atomic job claim, timezone-aware datetimes, scan_type validation
### Changed
- **Token cache now has TTL + thread lock + MSAL app reuse** (`scanners/sharepoint.py`) — `_TOKEN_CACHE` previously stored access tokens as plain strings forever, so long scans started failing with 401s once the ~1h token expired. It now stores `(token, expires_at)` and refreshes 60s before expiry, guarded by a new `_TOKEN_LOCK` (the worker fetches tokens from multiple threads). New `_get_msal_app()` caches one `ConfidentialClientApplication` per `(tenant, client, auth_method)` so MSAL's own token cache is reused instead of building a fresh app on every call.
- **Atomic job claim** (`worker.py`) — the queued-job selection now uses `.with_for_update(skip_locked=True)` (`SELECT … FOR UPDATE SKIP LOCKED`), so multiple worker threads/replicas can never claim the same job. Behaviour is unchanged for the current single worker but is now replica-safe.
- **Timezone-aware datetimes everywhere** — replaced all 24 `datetime.utcnow()` (naive, deprecated) with `datetime.now(timezone.utc)` across `models.py`, `worker.py`, `main.py`, and `cert.py`. SQLAlchemy datetime columns are now `DateTime(timezone=True)`; model defaults use a new `_utcnow()` helper. New Alembic migration `0002_timestamptz` converts existing `timestamp without time zone` columns to `timestamptz` (reinterpreting stored values as UTC), guarded per-column so it is a no-op on databases already timestamptz. **Behaviour note:** API datetimes now carry a UTC offset, so the frontend renders them correctly in local time (previously stored UTC was shown as if local).
- **`scan_type` request validation** (`schemas.py`) — `CreateScanJobRequest.scan_type` is now `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` instead of free `str`; invalid values return HTTP 422. The response model keeps `str` so legacy rows never trigger a serialization error. Verified: `scan_type=bogus` → 422, valid type passes schema validation.
## 2026-05-26 — Alembic migrations replace startup `create_all` + raw ALTERs
### Added
- **Alembic introduced (`alembic==1.14.0`)** — schema is now version-controlled instead of being patched at every startup. New `clearview_app/migrations/` package (`env.py` reuses the app's SQLAlchemy engine and `Base.metadata`; `versions/0001_baseline.py` baseline) and dev-only `containers/clearview/alembic.ini` for manual CLI use. The app builds the Alembic `Config` programmatically, so `alembic.ini` is not shipped in the image.
- **Baseline migration `0001_baseline`** — creates the full current schema via `Base.metadata.create_all`, guaranteed identical to the models (the same DDL the app emitted before). Future schema changes become explicit Alembic revisions.
- **Startup bootstrap `clearview_app/db_migrate.run_migrations()`** — idempotent, three cases: fresh DB → `upgrade head`; existing pre-Alembic DB (tables present, no `alembic_version`) → `stamp head` (adopt baseline without re-creating); already under Alembic → `upgrade head`. Verified end-to-end against throwaway databases (fresh upgrade, existing-DB stamp, re-run no-op) and a local image boot test (`/healthz` OK, schema + `alembic_version=0001_baseline`).
### Changed
- **`main.py` startup** — `on_startup()` now calls `run_migrations()` instead of `Base.metadata.create_all(bind=engine)` + `_ensure_schema_columns()`. The 18-statement raw `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` block (`_ensure_schema_columns`) is removed; unused `Base`/`engine` imports dropped. The existing dev/prod database is adopted automatically (stamped to baseline) on first start of the new build — no manual migration step required.
## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
### Added