Dev build 2026-05-26 15:12
This commit is contained in:
parent
61db7fe4a7
commit
0cdeabc0e6
45
containers/clearview/alembic.ini
Normal file
45
containers/clearview/alembic.ini
Normal file
@ -0,0 +1,45 @@
|
||||
# Alembic config for manual CLI use during development, e.g.:
|
||||
# cd containers/clearview && DATABASE_URL=postgresql://... PYTHONPATH=src alembic revision -m "msg"
|
||||
#
|
||||
# The application itself does NOT read this file: clearview_app.db_migrate builds
|
||||
# an Alembic Config programmatically and env.py takes the database URL from
|
||||
# DATABASE_URL via clearview_app.config. sqlalchemy.url is therefore left blank.
|
||||
|
||||
[alembic]
|
||||
script_location = src/clearview_app/migrations
|
||||
prepend_sys_path = src
|
||||
sqlalchemy.url =
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARNING
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARNING
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@ -1,6 +1,7 @@
|
||||
fastapi==0.115.0
|
||||
uvicorn[standard]==0.30.6
|
||||
sqlalchemy==2.0.36
|
||||
alembic==1.14.0
|
||||
psycopg[binary]==3.2.3
|
||||
python-multipart==0.0.12
|
||||
requests==2.32.3
|
||||
|
||||
@ -104,6 +104,8 @@
|
||||
statTenants: document.getElementById('statTenants'),
|
||||
statJobs: document.getElementById('statJobs'),
|
||||
statRunning: document.getElementById('statRunning'),
|
||||
statErrors: document.getElementById('statErrors'),
|
||||
dashRecentJobs: document.getElementById('dashRecentJobs'),
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
@ -610,6 +612,36 @@
|
||||
// Jobs list
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
function renderDashRecent(jobs) {
|
||||
if (!els.dashRecentJobs) return;
|
||||
if (!jobs.length) {
|
||||
els.dashRecentJobs.innerHTML = '<tr><td colspan="6">No jobs yet.</td></tr>';
|
||||
return;
|
||||
}
|
||||
els.dashRecentJobs.innerHTML = jobs.slice(0, 5).map(function (job) {
|
||||
var jobIdSafe = escHtml(job.id);
|
||||
var tenantLabel = job.tenant_name
|
||||
? escHtml(job.tenant_name)
|
||||
: '<span style="color:var(--cv-text-secondary)">manual</span>';
|
||||
var progress = job.total_targets > 0 ? (job.processed_targets + '/' + job.total_targets) : '0/0';
|
||||
return '<tr style="cursor:pointer" data-dash-job="' + jobIdSafe + '">' +
|
||||
'<td><code>' + jobIdSafe + '</code></td>' +
|
||||
'<td>' + escHtml(job.scan_type || 'sharepoint') + '</td>' +
|
||||
'<td>' + tenantLabel + '</td>' +
|
||||
'<td>' + statusBadge(job.status) + '</td>' +
|
||||
'<td>' + progress + '</td>' +
|
||||
'<td>' + formatDate(job.updated_at) + '</td>' +
|
||||
'</tr>';
|
||||
}).join('');
|
||||
els.dashRecentJobs.querySelectorAll('[data-dash-job]').forEach(function (row) {
|
||||
row.addEventListener('click', function () {
|
||||
state.selectedJobId = row.getAttribute('data-dash-job');
|
||||
navigateTo('jobs');
|
||||
refreshSelectedJob().catch(function () {});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function refreshJobs() {
|
||||
const filterTenant = els.jobTenantFilter.value;
|
||||
const filterType = els.jobTypeFilter ? els.jobTypeFilter.value : '';
|
||||
@ -626,6 +658,12 @@
|
||||
els.statRunning.textContent = String(jobs.filter(function (j) {
|
||||
return j.status === 'running' || j.status === 'queued';
|
||||
}).length);
|
||||
if (els.statErrors) {
|
||||
els.statErrors.textContent = String(jobs.filter(function (j) {
|
||||
return j.status === 'completed_with_errors' || (j.failed_targets || 0) > 0;
|
||||
}).length);
|
||||
}
|
||||
renderDashRecent(jobs);
|
||||
|
||||
if (!jobs.length) {
|
||||
els.jobsTableBody.innerHTML = '<tr><td colspan="9">No jobs yet.</td></tr>';
|
||||
@ -654,22 +692,23 @@
|
||||
} else {
|
||||
typeLabel = '<span class="risk ok">SharePoint</span>';
|
||||
}
|
||||
const jobIdSafe = escHtml(job.id);
|
||||
return (
|
||||
'<tr>' +
|
||||
'<td><code>' + job.id + '</code></td>' +
|
||||
'<td><code>' + jobIdSafe + '</code></td>' +
|
||||
'<td>' + typeLabel + '</td>' +
|
||||
'<td>' + tenantLabel + '</td>' +
|
||||
'<td>' + job.source_type + '</td>' +
|
||||
'<td>' + escHtml(job.source_type) + '</td>' +
|
||||
'<td>' + statusBadge(job.status) + '</td>' +
|
||||
'<td>' + progress + '</td>' +
|
||||
'<td>' + (job.items_scanned > 0 ? job.items_scanned : '-') + '</td>' +
|
||||
'<td>' + formatDate(job.updated_at) + '</td>' +
|
||||
'<td>' +
|
||||
'<div style="display:flex;gap:0.4rem">' +
|
||||
'<button class="btn btn-outline btn-small" data-job-inspect="' + job.id + '">Inspect</button>' +
|
||||
'<button class="btn btn-outline btn-small" data-job-inspect="' + jobIdSafe + '">Inspect</button>' +
|
||||
(job.status === 'queued' || job.status === 'running'
|
||||
? '<button class="btn btn-outline btn-small" data-job-cancel="' + job.id + '">Cancel</button>'
|
||||
: '<button class="btn btn-outline btn-small" data-job-delete="' + job.id + '">Delete</button>') +
|
||||
? '<button class="btn btn-outline btn-small" data-job-cancel="' + jobIdSafe + '">Cancel</button>'
|
||||
: '<button class="btn btn-outline btn-small" data-job-delete="' + jobIdSafe + '">Delete</button>') +
|
||||
'</div>' +
|
||||
'</td>' +
|
||||
'</tr>'
|
||||
@ -1527,14 +1566,16 @@
|
||||
return hash;
|
||||
}
|
||||
|
||||
function applyRoute(route) {
|
||||
function applyRoute(route, moveFocus) {
|
||||
if (!ROUTE_TITLES[route]) {
|
||||
route = 'dashboard';
|
||||
}
|
||||
state.currentRoute = route;
|
||||
var activePage = null;
|
||||
document.querySelectorAll('.route-page').forEach(function (page) {
|
||||
if (page.getAttribute('data-route-page') === route) {
|
||||
page.removeAttribute('hidden');
|
||||
activePage = page;
|
||||
} else {
|
||||
page.setAttribute('hidden', '');
|
||||
}
|
||||
@ -1549,6 +1590,16 @@
|
||||
if (els.contentTitle) {
|
||||
els.contentTitle.textContent = ROUTE_TITLES[route];
|
||||
}
|
||||
document.title = 'Clearview | ' + ROUTE_TITLES[route];
|
||||
// On user navigation, move focus to the new page's first heading so
|
||||
// screen-reader and keyboard users land in the freshly shown content.
|
||||
if (moveFocus && activePage) {
|
||||
var heading = activePage.querySelector('h1, h2');
|
||||
if (heading) {
|
||||
heading.setAttribute('tabindex', '-1');
|
||||
heading.focus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function navigateTo(route) {
|
||||
@ -1560,12 +1611,12 @@
|
||||
if (window.location.hash !== hash) {
|
||||
window.location.hash = hash;
|
||||
} else {
|
||||
applyRoute(route);
|
||||
applyRoute(route, true);
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('hashchange', function () {
|
||||
applyRoute(parseRoute());
|
||||
applyRoute(parseRoute(), true);
|
||||
});
|
||||
|
||||
applyRoute(parseRoute());
|
||||
|
||||
@ -73,6 +73,33 @@
|
||||
<span class="kpi" id="statRunning">0</span>
|
||||
<span class="label">Active Jobs</span>
|
||||
</article>
|
||||
<article>
|
||||
<span class="kpi" id="statErrors">0</span>
|
||||
<span class="label">With errors</span>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="panel">
|
||||
<div class="panel-header split">
|
||||
<h2>Recent jobs</h2>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Job ID</th>
|
||||
<th>Type</th>
|
||||
<th>Tenant</th>
|
||||
<th>Status</th>
|
||||
<th>Targets</th>
|
||||
<th>Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="dashRecentJobs">
|
||||
<tr><td colspan="6">No jobs yet.</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
@ -55,38 +55,12 @@ body {
|
||||
background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0));
|
||||
}
|
||||
|
||||
.topbar {
|
||||
width: min(1100px, calc(100% - 2rem));
|
||||
margin: 1.1rem auto 0;
|
||||
padding: 0.95rem 1.1rem;
|
||||
border: 1px solid var(--cv-border);
|
||||
border-radius: 18px;
|
||||
background: rgba(255, 255, 255, 0.75);
|
||||
backdrop-filter: blur(8px);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
box-shadow: 0 10px 24px rgba(20, 20, 19, 0.08);
|
||||
}
|
||||
|
||||
.brand-logo {
|
||||
height: 42px;
|
||||
width: auto;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.topbar-actions {
|
||||
display: flex;
|
||||
gap: 0.6rem;
|
||||
}
|
||||
|
||||
.layout {
|
||||
width: min(1100px, calc(100% - 2rem));
|
||||
margin: 1rem auto 2.5rem;
|
||||
display: grid;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.hero,
|
||||
.panel {
|
||||
border-radius: 22px;
|
||||
@ -131,7 +105,7 @@ h2 {
|
||||
.hero-stats {
|
||||
margin-top: 1.3rem;
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, minmax(0, 1fr));
|
||||
grid-template-columns: repeat(4, minmax(0, 1fr));
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
@ -291,8 +265,9 @@ textarea {
|
||||
input:focus,
|
||||
select:focus,
|
||||
textarea:focus,
|
||||
button:focus {
|
||||
outline: 2px solid rgba(14, 165, 233, 0.38);
|
||||
button:focus,
|
||||
a:focus-visible {
|
||||
outline: 2px solid var(--cv-accent);
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
@ -533,8 +508,8 @@ strong {
|
||||
}
|
||||
|
||||
.risk.warn {
|
||||
background: rgba(14, 165, 233, 0.15);
|
||||
color: var(--cv-accent-dark);
|
||||
background: rgba(234, 179, 8, 0.18);
|
||||
color: #854d0e;
|
||||
}
|
||||
|
||||
.risk.high {
|
||||
@ -584,12 +559,6 @@ strong {
|
||||
}
|
||||
|
||||
@media (max-width: 930px) {
|
||||
.topbar {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
gap: 0.8rem;
|
||||
}
|
||||
|
||||
.hero-stats {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
@ -616,11 +585,6 @@ strong {
|
||||
}
|
||||
|
||||
@media (max-width: 640px) {
|
||||
.layout,
|
||||
.topbar {
|
||||
width: calc(100% - 1rem);
|
||||
}
|
||||
|
||||
.hero,
|
||||
.panel {
|
||||
border-radius: 16px;
|
||||
@ -633,14 +597,6 @@ strong {
|
||||
.hero h1 {
|
||||
max-width: none;
|
||||
}
|
||||
|
||||
.topbar-actions {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.topbar-actions .btn {
|
||||
flex: 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
|
||||
321
containers/clearview/src/clearview_app/api_helpers.py
Normal file
321
containers/clearview/src/clearview_app/api_helpers.py
Normal file
@ -0,0 +1,321 @@
|
||||
"""Shared helpers for the API route modules.
|
||||
|
||||
Extracted verbatim from the original monolithic ``main.py`` so the route
|
||||
modules (``api_tenants``, ``api_jobs``) can share credential resolution, job
|
||||
creation, response mapping, and export helpers without circular imports.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from .db import SessionLocal
|
||||
from .default_sites import is_default_site, normalize_site_url
|
||||
from .models import ScanJob, ScanTarget, TenantProfile
|
||||
from .scanners import AuthConfig
|
||||
from .schemas import ScanJobCreateResponse, ScanJobSummary, TenantProfileItem
|
||||
|
||||
|
||||
def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
|
||||
"""
|
||||
Extract (group_name, link_type) from principal values such as:
|
||||
- SharingLinks.<guid>.<LinkType>.<guid>
|
||||
- c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
|
||||
"""
|
||||
if not principal:
|
||||
return None
|
||||
|
||||
text = principal.strip()
|
||||
segments = [s.strip() for s in text.split("|") if s.strip()]
|
||||
|
||||
candidate = ""
|
||||
for segment in reversed(segments):
|
||||
if segment.lower().startswith("sharinglinks."):
|
||||
candidate = segment
|
||||
break
|
||||
if not candidate and text.lower().startswith("sharinglinks."):
|
||||
candidate = text
|
||||
if not candidate:
|
||||
return None
|
||||
|
||||
parts = candidate.split(".")
|
||||
if len(parts) < 3:
|
||||
return None
|
||||
return candidate, parts[2]
|
||||
|
||||
|
||||
_SCAN_TYPE_LABELS = {
|
||||
"sharepoint": "Deviations",
|
||||
"sharepoint_root": "Root",
|
||||
"mailbox": "Mailbox",
|
||||
"entra_groups": "EntraGroups",
|
||||
}
|
||||
|
||||
|
||||
def _build_export_filename(job: ScanJob, job_id: str) -> str:
|
||||
tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
|
||||
safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
|
||||
scan_type = job.scan_type or "sharepoint"
|
||||
type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
|
||||
short_id = job_id.replace("-", "")[-12:]
|
||||
return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
|
||||
|
||||
|
||||
def _enumerate_all_entra_groups(
|
||||
tenant_id: str,
|
||||
client_id: str,
|
||||
client_secret: str | None,
|
||||
profile_id: str | None,
|
||||
) -> list[str]:
|
||||
cert_private_key: str | None = None
|
||||
cert_thumbprint: str | None = None
|
||||
cert_public_pem: str | None = None
|
||||
if profile_id:
|
||||
with SessionLocal() as db:
|
||||
profile = db.get(TenantProfile, profile_id)
|
||||
if profile:
|
||||
cert_private_key = profile.cert_private_key
|
||||
cert_thumbprint = profile.cert_thumbprint
|
||||
cert_public_pem = profile.cert_public_pem
|
||||
|
||||
auth = AuthConfig(
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret or "",
|
||||
cert_private_key=cert_private_key,
|
||||
cert_thumbprint=cert_thumbprint,
|
||||
cert_public_pem=cert_public_pem,
|
||||
)
|
||||
|
||||
from .scanners import entra as _entra
|
||||
|
||||
try:
|
||||
return _entra.list_all_groups(auth)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
|
||||
|
||||
|
||||
def _enumerate_all_mailboxes(
|
||||
organization: str | None,
|
||||
tenant_id: str,
|
||||
client_id: str,
|
||||
client_secret: str | None,
|
||||
profile_id: str | None,
|
||||
) -> list[str]:
|
||||
if not organization or "." not in organization:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
|
||||
)
|
||||
|
||||
cert_private_key: str | None = None
|
||||
cert_thumbprint: str | None = None
|
||||
cert_public_pem: str | None = None
|
||||
if profile_id:
|
||||
with SessionLocal() as db:
|
||||
profile = db.get(TenantProfile, profile_id)
|
||||
if profile:
|
||||
cert_private_key = profile.cert_private_key
|
||||
cert_thumbprint = profile.cert_thumbprint
|
||||
cert_public_pem = profile.cert_public_pem
|
||||
|
||||
auth = AuthConfig(
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret or "",
|
||||
cert_private_key=cert_private_key,
|
||||
cert_thumbprint=cert_thumbprint,
|
||||
cert_public_pem=cert_public_pem,
|
||||
)
|
||||
|
||||
from .scanners import mailbox as _mailbox
|
||||
|
||||
try:
|
||||
return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
|
||||
|
||||
|
||||
def _resolve_credentials(
|
||||
db,
|
||||
tenant_profile_id: str | None,
|
||||
tenant_id: str | None,
|
||||
client_id: str | None,
|
||||
client_secret: str | None,
|
||||
) -> tuple[str, str, str | None, str | None]:
|
||||
if tenant_profile_id:
|
||||
profile = db.get(TenantProfile, tenant_profile_id)
|
||||
if not profile:
|
||||
raise HTTPException(status_code=404, detail="Tenant profile not found")
|
||||
if not profile.client_secret and not profile.cert_thumbprint:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
|
||||
)
|
||||
return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
|
||||
if tenant_id and client_id and client_secret:
|
||||
return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
|
||||
)
|
||||
|
||||
|
||||
def _create_job_from_targets(
|
||||
raw_targets: list[str],
|
||||
scan_type: str,
|
||||
skip_default_sites: bool,
|
||||
source_type: str,
|
||||
tenant_id: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
tenant_profile_id: str | None = None,
|
||||
) -> ScanJobCreateResponse:
|
||||
accepted: list[str] = []
|
||||
skipped_default_urls: list[str] = []
|
||||
invalid: list[str] = []
|
||||
|
||||
seen: set[str] = set()
|
||||
|
||||
for raw in raw_targets:
|
||||
if scan_type == "mailbox":
|
||||
normalized = (raw or "").strip().lower()
|
||||
if not normalized or "@" not in normalized:
|
||||
invalid.append(raw)
|
||||
continue
|
||||
elif scan_type == "entra_groups":
|
||||
normalized = (raw or "").strip()
|
||||
if not normalized:
|
||||
invalid.append(raw)
|
||||
continue
|
||||
else:
|
||||
normalized = normalize_site_url(raw) or ""
|
||||
if not normalized:
|
||||
invalid.append(raw)
|
||||
continue
|
||||
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
|
||||
if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
|
||||
skipped_default_urls.append(normalized)
|
||||
continue
|
||||
|
||||
accepted.append(normalized)
|
||||
|
||||
with SessionLocal() as db:
|
||||
now = datetime.now(timezone.utc)
|
||||
job = ScanJob(
|
||||
id=str(uuid.uuid4()),
|
||||
source_type=source_type,
|
||||
scan_type=scan_type,
|
||||
status="queued" if accepted else "completed",
|
||||
skip_default_sites=skip_default_sites,
|
||||
tenant_profile_id=tenant_profile_id,
|
||||
auth_tenant_id=tenant_id,
|
||||
auth_client_id=client_id,
|
||||
auth_client_secret=client_secret,
|
||||
total_targets=len(accepted),
|
||||
skipped_targets=len(skipped_default_urls),
|
||||
warning_message=None,
|
||||
error_message=None,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
finished_at=now if not accepted else None,
|
||||
)
|
||||
|
||||
if not accepted:
|
||||
if scan_type == "mailbox":
|
||||
job.warning_message = "No scannable mailboxes after validation"
|
||||
else:
|
||||
job.warning_message = "No scannable sites after validation and default-site filtering"
|
||||
|
||||
db.add(job)
|
||||
db.flush()
|
||||
|
||||
for index, target in enumerate(accepted, start=1):
|
||||
db.add(
|
||||
ScanTarget(
|
||||
job_id=job.id,
|
||||
site_url=target,
|
||||
source_row=index,
|
||||
status="queued",
|
||||
attempts=0,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
)
|
||||
|
||||
db.commit()
|
||||
|
||||
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
|
||||
job = db.execute(stmt).unique().scalar_one()
|
||||
|
||||
return ScanJobCreateResponse(
|
||||
job=_to_job_summary(job),
|
||||
accepted_urls=accepted,
|
||||
skipped_default_urls=skipped_default_urls,
|
||||
invalid_urls=invalid,
|
||||
)
|
||||
|
||||
|
||||
def _to_job_summary(job: ScanJob) -> ScanJobSummary:
|
||||
return ScanJobSummary(
|
||||
id=job.id,
|
||||
status=job.status,
|
||||
source_type=job.source_type,
|
||||
scan_type=job.scan_type or "sharepoint",
|
||||
skip_default_sites=job.skip_default_sites,
|
||||
tenant_profile_id=job.tenant_profile_id,
|
||||
tenant_name=job.tenant_profile.name if job.tenant_profile else None,
|
||||
total_targets=job.total_targets,
|
||||
processed_targets=job.processed_targets,
|
||||
successful_targets=job.successful_targets,
|
||||
failed_targets=job.failed_targets,
|
||||
skipped_targets=job.skipped_targets,
|
||||
items_scanned=job.items_scanned,
|
||||
scan_activity=job.scan_activity if job.status == "running" else None,
|
||||
warning_message=job.warning_message,
|
||||
error_message=job.error_message,
|
||||
created_at=job.created_at,
|
||||
updated_at=job.updated_at,
|
||||
started_at=job.started_at,
|
||||
finished_at=job.finished_at,
|
||||
)
|
||||
|
||||
|
||||
def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
|
||||
return TenantProfileItem(
|
||||
id=profile.id,
|
||||
name=profile.name,
|
||||
tenant_id=profile.tenant_id,
|
||||
primary_domain=profile.primary_domain,
|
||||
client_id=profile.client_id,
|
||||
has_certificate=bool(profile.cert_thumbprint),
|
||||
cert_thumbprint=profile.cert_thumbprint,
|
||||
cert_expires_at=profile.cert_expires_at,
|
||||
created_at=profile.created_at,
|
||||
updated_at=profile.updated_at,
|
||||
)
|
||||
|
||||
|
||||
def _sharing_link_risk_label(principal: str) -> str:
|
||||
if not principal.startswith("SharingLinks."):
|
||||
return ""
|
||||
parts = principal.split(".", 3)
|
||||
link_type = parts[2] if len(parts) >= 3 else ""
|
||||
if link_type.startswith("Anonymous"):
|
||||
return "Critical"
|
||||
if link_type == "Flexible":
|
||||
return "High"
|
||||
if link_type.startswith("Organization"):
|
||||
return "Low"
|
||||
if link_type.startswith("Direct"):
|
||||
return "Low"
|
||||
return "Unknown"
|
||||
645
containers/clearview/src/clearview_app/api_jobs.py
Normal file
645
containers/clearview/src/clearview_app/api_jobs.py
Normal file
@ -0,0 +1,645 @@
|
||||
"""Scan-job routes: create, list, inspect, cancel, delete, resolve, export."""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import Response, StreamingResponse
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from .api_helpers import (
|
||||
_build_export_filename,
|
||||
_create_job_from_targets,
|
||||
_enumerate_all_entra_groups,
|
||||
_enumerate_all_mailboxes,
|
||||
_extract_sharing_link_group_and_type,
|
||||
_resolve_credentials,
|
||||
_sharing_link_risk_label,
|
||||
_to_job_summary,
|
||||
)
|
||||
from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
|
||||
from .db import SessionLocal
|
||||
from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
|
||||
from .scanners import AuthConfig, probe
|
||||
from .schemas import (
|
||||
CreateScanJobRequest,
|
||||
PermissionDeviationItem,
|
||||
ProbeResultResponse,
|
||||
ResolveGroupsResponse,
|
||||
ResolveSharingLinksRequest,
|
||||
ResolveSharingLinksResponse,
|
||||
ScanJobCreateResponse,
|
||||
ScanJobDetail,
|
||||
ScanJobSummary,
|
||||
ScanTargetItem,
|
||||
SharingLinkTypesResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
|
||||
def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
|
||||
with SessionLocal() as db:
|
||||
tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
|
||||
db=db,
|
||||
tenant_profile_id=payload.tenant_profile_id,
|
||||
tenant_id=payload.tenant_id,
|
||||
client_id=payload.client_id,
|
||||
client_secret=payload.client_secret,
|
||||
)
|
||||
source_type = "manual"
|
||||
if payload.scan_type == "entra_groups":
|
||||
if payload.scan_all_groups:
|
||||
raw_targets = _enumerate_all_entra_groups(
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
profile_id=profile_id,
|
||||
)
|
||||
source_type = "tenant_all"
|
||||
else:
|
||||
raw_targets = [str(g) for g in payload.group_ids]
|
||||
elif payload.scan_type == "mailbox":
|
||||
if payload.scan_all_mailboxes:
|
||||
organization = payload.organization
|
||||
if (not organization) and profile_id:
|
||||
with SessionLocal() as db:
|
||||
profile = db.get(TenantProfile, profile_id)
|
||||
if profile and profile.primary_domain:
|
||||
organization = profile.primary_domain
|
||||
raw_targets = _enumerate_all_mailboxes(
|
||||
organization=organization,
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
profile_id=profile_id,
|
||||
)
|
||||
source_type = "tenant_all"
|
||||
else:
|
||||
raw_targets = [str(m) for m in payload.mailboxes]
|
||||
else:
|
||||
raw_targets = [str(item) for item in payload.site_urls]
|
||||
return _create_job_from_targets(
|
||||
raw_targets=raw_targets,
|
||||
scan_type=payload.scan_type,
|
||||
skip_default_sites=payload.skip_default_sites,
|
||||
source_type=source_type,
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
tenant_profile_id=profile_id,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
|
||||
def create_scan_job_from_csv(
|
||||
skip_default_sites: bool = True,
|
||||
scan_type: str = Form("sharepoint"),
|
||||
tenant_profile_id: str | None = Form(None),
|
||||
tenant_id: str | None = Form(None),
|
||||
client_id: str | None = Form(None),
|
||||
client_secret: str | None = Form(None),
|
||||
file: UploadFile = File(...),
|
||||
) -> ScanJobCreateResponse:
|
||||
with SessionLocal() as db:
|
||||
resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
|
||||
db=db,
|
||||
tenant_profile_id=tenant_profile_id,
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
)
|
||||
content = file.file.read()
|
||||
if scan_type == "mailbox":
|
||||
parsed = parse_mailboxes_csv(content)
|
||||
targets = parsed.mailboxes
|
||||
elif scan_type == "entra_groups":
|
||||
parsed = parse_entra_groups_csv(content)
|
||||
targets = parsed.urls
|
||||
else:
|
||||
parsed = parse_sites_csv(content)
|
||||
targets = parsed.urls
|
||||
response = _create_job_from_targets(
|
||||
raw_targets=targets,
|
||||
scan_type=scan_type,
|
||||
skip_default_sites=skip_default_sites,
|
||||
source_type="csv",
|
||||
tenant_id=resolved_tenant_id,
|
||||
client_id=resolved_client_id,
|
||||
client_secret=resolved_client_secret,
|
||||
tenant_profile_id=profile_id,
|
||||
)
|
||||
|
||||
if parsed.invalid_rows:
|
||||
csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, response.job.id)
|
||||
if job:
|
||||
if job.warning_message:
|
||||
job.warning_message = f"{job.warning_message} | {csv_warning}"
|
||||
else:
|
||||
job.warning_message = csv_warning
|
||||
job.updated_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
db.refresh(job)
|
||||
response.job.warning_message = job.warning_message
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
|
||||
def cancel_scan_job(job_id: str) -> ScanJobSummary:
|
||||
with SessionLocal() as db:
|
||||
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
|
||||
job = db.execute(stmt).unique().scalar_one_or_none()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
if job.status not in ("queued", "running"):
|
||||
raise HTTPException(status_code=409, detail="Job is not queued or running")
|
||||
now = datetime.now(timezone.utc)
|
||||
job.status = "cancelled"
|
||||
job.updated_at = now
|
||||
job.finished_at = now
|
||||
job.scan_activity = None
|
||||
db.commit()
|
||||
db.refresh(job)
|
||||
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
|
||||
job = db.execute(stmt).unique().scalar_one()
|
||||
return _to_job_summary(job)
|
||||
|
||||
|
||||
@router.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
|
||||
def delete_scan_job(job_id: str) -> Response:
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
if job.status in ("queued", "running"):
|
||||
raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
|
||||
db.delete(job)
|
||||
db.commit()
|
||||
return Response(status_code=204)
|
||||
|
||||
|
||||
@router.get("/api/scan-jobs", response_model=list[ScanJobSummary])
|
||||
def list_scan_jobs(
|
||||
limit: int = 20,
|
||||
tenant_profile_id: str | None = None,
|
||||
scan_type: str | None = None,
|
||||
) -> list[ScanJobSummary]:
|
||||
with SessionLocal() as db:
|
||||
stmt = (
|
||||
select(ScanJob)
|
||||
.options(joinedload(ScanJob.tenant_profile))
|
||||
.order_by(ScanJob.created_at.desc())
|
||||
.limit(max(1, min(limit, 100)))
|
||||
)
|
||||
if tenant_profile_id:
|
||||
stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
|
||||
if scan_type:
|
||||
stmt = stmt.where(ScanJob.scan_type == scan_type)
|
||||
jobs = list(db.execute(stmt).unique().scalars())
|
||||
return [_to_job_summary(job) for job in jobs]
|
||||
|
||||
|
||||
@router.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
|
||||
def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
principals = list(
|
||||
db.execute(
|
||||
select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
|
||||
).scalars()
|
||||
)
|
||||
|
||||
type_counts: dict[str, int] = {}
|
||||
for principal in principals:
|
||||
parsed = _extract_sharing_link_group_and_type(str(principal or ""))
|
||||
if not parsed:
|
||||
continue
|
||||
_group_name, link_type = parsed
|
||||
type_counts[link_type] = type_counts.get(link_type, 0) + 1
|
||||
|
||||
return SharingLinkTypesResponse(type_counts=type_counts)
|
||||
|
||||
|
||||
@router.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
|
||||
def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
|
||||
from .scanner import resolve_sharing_link_members
|
||||
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
if job.status in ("queued", "running"):
|
||||
raise HTTPException(status_code=409, detail="Job is still running")
|
||||
|
||||
cert_private_key: str | None = None
|
||||
cert_thumbprint: str | None = None
|
||||
cert_public_pem: str | None = None
|
||||
if job.tenant_profile_id:
|
||||
profile = db.get(TenantProfile, job.tenant_profile_id)
|
||||
if profile:
|
||||
cert_private_key = profile.cert_private_key
|
||||
cert_thumbprint = profile.cert_thumbprint
|
||||
cert_public_pem = profile.cert_public_pem
|
||||
|
||||
auth = AuthConfig(
|
||||
tenant_id=job.auth_tenant_id or "",
|
||||
client_id=job.auth_client_id or "",
|
||||
client_secret=job.auth_client_secret or "",
|
||||
cert_private_key=cert_private_key,
|
||||
cert_thumbprint=cert_thumbprint,
|
||||
cert_public_pem=cert_public_pem,
|
||||
)
|
||||
|
||||
all_deviations = list(
|
||||
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
|
||||
)
|
||||
|
||||
# Group by (site_url, principal) so each unique group is resolved once
|
||||
groups: dict[tuple[str, str], list[int]] = {}
|
||||
for dev in all_deviations:
|
||||
parsed = _extract_sharing_link_group_and_type(dev.principal)
|
||||
if not parsed:
|
||||
continue
|
||||
group_name, link_type = parsed
|
||||
if link_type not in payload.link_types:
|
||||
continue
|
||||
key = (dev.site_url, group_name)
|
||||
groups.setdefault(key, []).append(dev.id)
|
||||
|
||||
updated_deviations = 0
|
||||
for (site_url, group_name), dev_ids in groups.items():
|
||||
members = resolve_sharing_link_members(site_url, group_name, auth)
|
||||
resolved_members = ", ".join(members) if members else ""
|
||||
with SessionLocal() as db:
|
||||
for dev_id in dev_ids:
|
||||
dev = db.get(PermissionDeviation, dev_id)
|
||||
if dev:
|
||||
dev.resolved_members = resolved_members
|
||||
db.commit()
|
||||
updated_deviations += len(dev_ids)
|
||||
|
||||
return ResolveSharingLinksResponse(
|
||||
resolved_groups=len(groups),
|
||||
updated_deviations=updated_deviations,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
|
||||
def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
|
||||
"""
|
||||
Expand group principals on this job's deviations and write each group's
|
||||
member list to permission_deviations.resolved_members. Handles both
|
||||
classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
|
||||
assigned directly at root (via Microsoft Graph). Skips email-shape users
|
||||
and SharingLinks groups (those have their own resolver).
|
||||
"""
|
||||
from .scanners.sharepoint import (
|
||||
is_aad_group_principal,
|
||||
is_sharepoint_group_principal,
|
||||
resolve_aad_group_members,
|
||||
resolve_sharing_link_members,
|
||||
)
|
||||
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
if job.status in ("queued", "running"):
|
||||
raise HTTPException(status_code=409, detail="Job is still running")
|
||||
if (job.scan_type or "sharepoint") == "mailbox":
|
||||
raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
|
||||
|
||||
cert_private_key: str | None = None
|
||||
cert_thumbprint: str | None = None
|
||||
cert_public_pem: str | None = None
|
||||
if job.tenant_profile_id:
|
||||
profile = db.get(TenantProfile, job.tenant_profile_id)
|
||||
if profile:
|
||||
cert_private_key = profile.cert_private_key
|
||||
cert_thumbprint = profile.cert_thumbprint
|
||||
cert_public_pem = profile.cert_public_pem
|
||||
|
||||
auth = AuthConfig(
|
||||
tenant_id=job.auth_tenant_id or "",
|
||||
client_id=job.auth_client_id or "",
|
||||
client_secret=job.auth_client_secret or "",
|
||||
cert_private_key=cert_private_key,
|
||||
cert_thumbprint=cert_thumbprint,
|
||||
cert_public_pem=cert_public_pem,
|
||||
)
|
||||
|
||||
all_deviations = list(
|
||||
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
|
||||
)
|
||||
|
||||
# Group deviations by (site_url, principal) so each unique group is resolved once
|
||||
groups: dict[tuple[str, str], list[int]] = {}
|
||||
for dev in all_deviations:
|
||||
if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
|
||||
continue
|
||||
key = (dev.site_url, dev.principal)
|
||||
groups.setdefault(key, []).append(dev.id)
|
||||
|
||||
resolved = 0
|
||||
skipped = 0
|
||||
updated = 0
|
||||
for (site_url, group_name), dev_ids in groups.items():
|
||||
try:
|
||||
if is_aad_group_principal(group_name):
|
||||
members = resolve_aad_group_members(group_name, auth)
|
||||
else:
|
||||
members = resolve_sharing_link_members(site_url, group_name, auth)
|
||||
except Exception: # noqa: BLE001
|
||||
members = []
|
||||
|
||||
if not members:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
resolved_text = ", ".join(members)
|
||||
with SessionLocal() as db:
|
||||
for dev_id in dev_ids:
|
||||
dev = db.get(PermissionDeviation, dev_id)
|
||||
if dev:
|
||||
dev.resolved_members = resolved_text
|
||||
db.commit()
|
||||
resolved += 1
|
||||
updated += len(dev_ids)
|
||||
|
||||
return ResolveGroupsResponse(
|
||||
resolved_groups=resolved,
|
||||
skipped_groups=skipped,
|
||||
updated_deviations=updated,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
|
||||
def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
target = db.get(ScanTarget, target_id)
|
||||
if not target or target.job_id != job_id:
|
||||
raise HTTPException(status_code=404, detail="Target not found")
|
||||
if job.status in ("queued", "running"):
|
||||
raise HTTPException(status_code=409, detail="Job is still running")
|
||||
|
||||
cert_private_key: str | None = None
|
||||
cert_thumbprint: str | None = None
|
||||
cert_public_pem: str | None = None
|
||||
if job.tenant_profile_id:
|
||||
profile = db.get(TenantProfile, job.tenant_profile_id)
|
||||
if profile:
|
||||
cert_private_key = profile.cert_private_key
|
||||
cert_thumbprint = profile.cert_thumbprint
|
||||
cert_public_pem = profile.cert_public_pem
|
||||
|
||||
auth = AuthConfig(
|
||||
tenant_id=job.auth_tenant_id or "",
|
||||
client_id=job.auth_client_id or "",
|
||||
client_secret=job.auth_client_secret or "",
|
||||
cert_private_key=cert_private_key,
|
||||
cert_thumbprint=cert_thumbprint,
|
||||
cert_public_pem=cert_public_pem,
|
||||
)
|
||||
site_url = target.site_url
|
||||
job_scan_type = job.scan_type or "sharepoint"
|
||||
|
||||
result = probe(job_scan_type, site_url, auth)
|
||||
|
||||
with SessionLocal() as db:
|
||||
target = db.get(ScanTarget, target_id)
|
||||
if not target:
|
||||
raise HTTPException(status_code=404, detail="Target not found")
|
||||
now = datetime.now(timezone.utc)
|
||||
target.last_probe_at = now
|
||||
target.last_probe_ok = result.ok
|
||||
target.last_probe_message = result.message
|
||||
target.updated_at = now
|
||||
db.commit()
|
||||
db.refresh(target)
|
||||
return ProbeResultResponse(
|
||||
target_id=target.id,
|
||||
ok=result.ok,
|
||||
message=result.message,
|
||||
last_probe_at=target.last_probe_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/scan-jobs/{job_id}/export")
|
||||
def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
|
||||
import openpyxl
|
||||
from openpyxl.styles import Font, PatternFill
|
||||
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
|
||||
if site_url:
|
||||
targets_q = targets_q.where(ScanTarget.site_url == site_url)
|
||||
targets = list(db.execute(targets_q).scalars())
|
||||
|
||||
deviations_q = (
|
||||
select(PermissionDeviation)
|
||||
.where(PermissionDeviation.job_id == job.id)
|
||||
.order_by(PermissionDeviation.id.desc())
|
||||
)
|
||||
if site_url:
|
||||
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
|
||||
deviations = list(db.execute(deviations_q).scalars())
|
||||
|
||||
wb = openpyxl.Workbook()
|
||||
header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
|
||||
header_font_white = Font(bold=True, color="FFFFFF")
|
||||
|
||||
_risk_styles: dict[str, tuple] = {
|
||||
"Critical": (
|
||||
PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
|
||||
Font(bold=True, color="7B0000"),
|
||||
),
|
||||
"High": (
|
||||
PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
|
||||
Font(bold=True, color="7C2D00"),
|
||||
),
|
||||
"Low": (
|
||||
PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
|
||||
Font(bold=True, color="0C4A6E"),
|
||||
),
|
||||
"Unknown": (
|
||||
PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
|
||||
Font(bold=True, color="555555"),
|
||||
),
|
||||
}
|
||||
|
||||
def _style_header(ws, headers):
|
||||
ws.append(headers)
|
||||
for cell in ws[1]:
|
||||
cell.font = header_font_white
|
||||
cell.fill = header_fill
|
||||
|
||||
scan_type = job.scan_type or "sharepoint"
|
||||
|
||||
target_label = {
|
||||
"sharepoint": "Site URL",
|
||||
"sharepoint_root": "Site URL",
|
||||
"mailbox": "Mailbox",
|
||||
"entra_groups": "Group",
|
||||
}.get(scan_type, "Target")
|
||||
|
||||
# Targets sheet
|
||||
ws_targets = wb.active
|
||||
ws_targets.title = "Targets"
|
||||
_style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
|
||||
for t in targets:
|
||||
ws_targets.append([
|
||||
t.site_url,
|
||||
t.status,
|
||||
t.attempts,
|
||||
t.error_message or "",
|
||||
t.started_at.isoformat() if t.started_at else "",
|
||||
t.finished_at.isoformat() if t.finished_at else "",
|
||||
])
|
||||
for col in ws_targets.columns:
|
||||
ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
|
||||
|
||||
# Results sheet — name and columns depend on scan type
|
||||
if scan_type == "mailbox":
|
||||
ws_dev = wb.create_sheet("Mailbox Permissions")
|
||||
_style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
|
||||
deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
|
||||
for d in deviations:
|
||||
ws_dev.append([
|
||||
d.site_url,
|
||||
d.object_url,
|
||||
d.permission_type or d.object_type,
|
||||
d.principal,
|
||||
d.role_name,
|
||||
])
|
||||
elif scan_type == "entra_groups":
|
||||
ws_dev = wb.create_sheet("Group Memberships")
|
||||
_style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
|
||||
deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
|
||||
for d in deviations:
|
||||
ws_dev.append([
|
||||
d.object_url,
|
||||
d.permission_type or "",
|
||||
d.principal,
|
||||
d.role_name,
|
||||
])
|
||||
elif scan_type == "sharepoint_root":
|
||||
ws_dev = wb.create_sheet("Root Permissions")
|
||||
_style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
|
||||
deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
|
||||
for d in deviations:
|
||||
ws_dev.append([
|
||||
d.site_url,
|
||||
d.principal,
|
||||
d.resolved_members or "",
|
||||
d.role_name,
|
||||
])
|
||||
else:
|
||||
ws_dev = wb.create_sheet("Deviations")
|
||||
_style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
|
||||
deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
|
||||
for d in deviations:
|
||||
base = (d.site_url or "").rstrip("/")
|
||||
obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
|
||||
link_risk = _sharing_link_risk_label(d.principal)
|
||||
ws_dev.append([
|
||||
d.site_url,
|
||||
obj_rel,
|
||||
d.object_type,
|
||||
d.principal,
|
||||
link_risk,
|
||||
d.resolved_members or "",
|
||||
d.role_name,
|
||||
d.delta_type,
|
||||
])
|
||||
if link_risk in _risk_styles:
|
||||
risk_fill, risk_font = _risk_styles[link_risk]
|
||||
risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
|
||||
risk_cell.fill = risk_fill
|
||||
risk_cell.font = risk_font
|
||||
for col in ws_dev.columns:
|
||||
ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
|
||||
|
||||
buf = io.BytesIO()
|
||||
wb.save(buf)
|
||||
buf.seek(0)
|
||||
|
||||
filename = _build_export_filename(job, job_id)
|
||||
return StreamingResponse(
|
||||
buf,
|
||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
|
||||
def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
|
||||
with SessionLocal() as db:
|
||||
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
|
||||
if site_url:
|
||||
targets_q = targets_q.where(ScanTarget.site_url == site_url)
|
||||
targets = list(db.execute(targets_q).scalars())
|
||||
|
||||
deviations_q = (
|
||||
select(PermissionDeviation)
|
||||
.where(PermissionDeviation.job_id == job.id)
|
||||
.order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
|
||||
)
|
||||
if site_url:
|
||||
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
|
||||
else:
|
||||
deviations_q = deviations_q.limit(1000)
|
||||
deviations = list(db.execute(deviations_q).scalars())
|
||||
|
||||
return ScanJobDetail(
|
||||
**_to_job_summary(job).model_dump(),
|
||||
targets=[
|
||||
ScanTargetItem(
|
||||
id=t.id,
|
||||
site_url=t.site_url,
|
||||
status=t.status,
|
||||
attempts=t.attempts,
|
||||
error_message=t.error_message,
|
||||
started_at=t.started_at,
|
||||
finished_at=t.finished_at,
|
||||
last_probe_at=t.last_probe_at,
|
||||
last_probe_ok=t.last_probe_ok,
|
||||
last_probe_message=t.last_probe_message,
|
||||
)
|
||||
for t in targets
|
||||
],
|
||||
deviations=[
|
||||
PermissionDeviationItem(
|
||||
id=d.id,
|
||||
site_url=d.site_url,
|
||||
object_url=d.object_url,
|
||||
object_type=d.object_type,
|
||||
principal=d.principal,
|
||||
role_name=d.role_name,
|
||||
delta_type=d.delta_type,
|
||||
permission_type=d.permission_type,
|
||||
resolved_members=d.resolved_members,
|
||||
created_at=d.created_at,
|
||||
)
|
||||
for d in deviations
|
||||
],
|
||||
)
|
||||
76
containers/clearview/src/clearview_app/api_onboarding.py
Normal file
76
containers/clearview/src/clearview_app/api_onboarding.py
Normal file
@ -0,0 +1,76 @@
|
||||
"""Microsoft onboarding routes (admin-consent connect + scan-app creation)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import RedirectResponse
|
||||
|
||||
from .onboarding import (
|
||||
OnboardingError,
|
||||
consume_callback_state,
|
||||
create_connect_url,
|
||||
create_scan_app_for_tenant,
|
||||
)
|
||||
from .schemas import (
|
||||
ConnectMicrosoftResponse,
|
||||
CreateScanAppRequest,
|
||||
CreateScanAppResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
|
||||
def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
|
||||
try:
|
||||
result = create_scan_app_for_tenant(
|
||||
tenant_id=payload.tenant_id,
|
||||
display_name=payload.display_name,
|
||||
)
|
||||
except OnboardingError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
|
||||
|
||||
return CreateScanAppResponse(
|
||||
tenant_id=result.tenant_id,
|
||||
client_id=result.client_id,
|
||||
client_secret=result.client_secret,
|
||||
app_object_id=result.app_object_id,
|
||||
service_principal_id=result.service_principal_id,
|
||||
display_name=result.display_name,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
|
||||
def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
|
||||
try:
|
||||
return ConnectMicrosoftResponse(connect_url=create_connect_url())
|
||||
except OnboardingError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/onboarding/microsoft/callback")
|
||||
def onboarding_microsoft_callback(
|
||||
tenant: str | None = None,
|
||||
state: str | None = None,
|
||||
error: str | None = None,
|
||||
error_description: str | None = None,
|
||||
) -> RedirectResponse:
|
||||
if error:
|
||||
message = (error_description or error).replace(" ", "+")
|
||||
return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
|
||||
|
||||
if not state or not consume_callback_state(state):
|
||||
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
|
||||
|
||||
if not tenant:
|
||||
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
|
||||
|
||||
return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
|
||||
|
||||
|
||||
@router.get("/api/onboarding/status")
|
||||
def onboarding_status() -> dict[str, bool]:
|
||||
from . import config
|
||||
automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
|
||||
return {"automated_available": automated}
|
||||
86
containers/clearview/src/clearview_app/api_tenants.py
Normal file
86
containers/clearview/src/clearview_app/api_tenants.py
Normal file
@ -0,0 +1,86 @@
|
||||
"""Tenant profile + certificate routes."""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy import select, text
|
||||
|
||||
from .api_helpers import _to_tenant_item
|
||||
from .cert import generate_tenant_certificate
|
||||
from .db import SessionLocal
|
||||
from .models import TenantProfile
|
||||
from .schemas import (
|
||||
CreateTenantProfileRequest,
|
||||
TenantCertificateResponse,
|
||||
TenantProfileItem,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/tenants", response_model=list[TenantProfileItem])
|
||||
def list_tenants() -> list[TenantProfileItem]:
|
||||
with SessionLocal() as db:
|
||||
profiles = list(
|
||||
db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
|
||||
)
|
||||
return [_to_tenant_item(p) for p in profiles]
|
||||
|
||||
|
||||
@router.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
|
||||
def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
|
||||
with SessionLocal() as db:
|
||||
now = datetime.now(timezone.utc)
|
||||
profile = TenantProfile(
|
||||
id=str(uuid.uuid4()),
|
||||
name=payload.name.strip(),
|
||||
tenant_id=payload.tenant_id.strip(),
|
||||
primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
|
||||
client_id=payload.client_id.strip(),
|
||||
client_secret=payload.client_secret.strip() if payload.client_secret else None,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
db.add(profile)
|
||||
db.commit()
|
||||
db.refresh(profile)
|
||||
return _to_tenant_item(profile)
|
||||
|
||||
|
||||
@router.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
|
||||
def generate_certificate(profile_id: str) -> TenantCertificateResponse:
|
||||
with SessionLocal() as db:
|
||||
profile = db.get(TenantProfile, profile_id)
|
||||
if not profile:
|
||||
raise HTTPException(status_code=404, detail="Tenant profile not found")
|
||||
result = generate_tenant_certificate()
|
||||
profile.cert_private_key = result.private_key_pem
|
||||
profile.cert_public_pem = result.public_cert_pem
|
||||
profile.cert_thumbprint = result.thumbprint
|
||||
profile.cert_expires_at = result.expires_at
|
||||
profile.updated_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
return TenantCertificateResponse(
|
||||
thumbprint=result.thumbprint,
|
||||
expires_at=result.expires_at,
|
||||
public_cert_pem=result.public_cert_pem,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
|
||||
def delete_tenant(profile_id: str) -> Response:
|
||||
with SessionLocal() as db:
|
||||
profile = db.get(TenantProfile, profile_id)
|
||||
if not profile:
|
||||
raise HTTPException(status_code=404, detail="Tenant profile not found")
|
||||
# Detach jobs from this profile before deleting
|
||||
db.execute(
|
||||
text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
|
||||
{"pid": profile_id},
|
||||
)
|
||||
db.delete(profile)
|
||||
db.commit()
|
||||
return Response(status_code=204)
|
||||
@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.primitives import hashes, serialization
|
||||
@ -30,7 +30,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
|
||||
subject = x509.Name([
|
||||
x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"),
|
||||
])
|
||||
expires_at = datetime.utcnow() + timedelta(days=365 * valid_years)
|
||||
expires_at = datetime.now(timezone.utc) + timedelta(days=365 * valid_years)
|
||||
|
||||
cert = (
|
||||
x509.CertificateBuilder()
|
||||
@ -38,7 +38,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
|
||||
.issuer_name(subject)
|
||||
.public_key(private_key.public_key())
|
||||
.serial_number(x509.random_serial_number())
|
||||
.not_valid_before(datetime.utcnow())
|
||||
.not_valid_before(datetime.now(timezone.utc))
|
||||
.not_valid_after(expires_at)
|
||||
.sign(private_key, hashes.SHA256())
|
||||
)
|
||||
|
||||
53
containers/clearview/src/clearview_app/db_migrate.py
Normal file
53
containers/clearview/src/clearview_app/db_migrate.py
Normal file
@ -0,0 +1,53 @@
|
||||
"""Database migration bootstrap.
|
||||
|
||||
Replaces the previous ``Base.metadata.create_all`` + ``_ensure_schema_columns``
|
||||
startup path with Alembic. The bootstrap is idempotent and handles three cases:
|
||||
|
||||
* **Fresh database** (no tables): run ``upgrade head`` to create the schema and
|
||||
record the Alembic version.
|
||||
* **Existing pre-Alembic database** (tables present, no ``alembic_version``):
|
||||
``stamp head`` — adopt the baseline without re-creating existing tables.
|
||||
* **Already under Alembic**: run ``upgrade head`` to apply any new revisions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from alembic import command
|
||||
from alembic.config import Config
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from .db import engine
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_MIGRATIONS_DIR = Path(__file__).resolve().parent / "migrations"
|
||||
# A table that exists in every pre-Alembic Clearview database; its presence
|
||||
# (without alembic_version) marks a database that predates Alembic adoption.
|
||||
_SENTINEL_TABLE = "scan_jobs"
|
||||
|
||||
|
||||
def _alembic_config() -> Config:
|
||||
cfg = Config()
|
||||
cfg.set_main_option("script_location", str(_MIGRATIONS_DIR))
|
||||
return cfg
|
||||
|
||||
|
||||
_BASELINE_REVISION = "0001_baseline"
|
||||
|
||||
|
||||
def run_migrations() -> None:
|
||||
"""Bring the database schema up to date (see module docstring)."""
|
||||
cfg = _alembic_config()
|
||||
tables = set(inspect(engine).get_table_names())
|
||||
|
||||
if "alembic_version" not in tables and _SENTINEL_TABLE in tables:
|
||||
# Pre-Alembic DB: it already matches the baseline, so adopt that
|
||||
# revision without re-creating tables, then let upgrade apply any
|
||||
# later migrations (e.g. the timestamptz conversion in 0002).
|
||||
log.info("Existing pre-Alembic schema detected; stamping baseline %s.", _BASELINE_REVISION)
|
||||
command.stamp(cfg, _BASELINE_REVISION)
|
||||
|
||||
log.info("Applying Alembic migrations (upgrade head).")
|
||||
command.upgrade(cfg, "head")
|
||||
File diff suppressed because it is too large
Load Diff
58
containers/clearview/src/clearview_app/migrations/env.py
Normal file
58
containers/clearview/src/clearview_app/migrations/env.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""Alembic environment for Clearview.
|
||||
|
||||
Reuses the application's SQLAlchemy engine (already configured with the
|
||||
normalized DATABASE_URL and pool_pre_ping) so migrations run against exactly
|
||||
the same database the app uses. Logging config from alembic.ini is applied
|
||||
only when Alembic is invoked through the CLI; programmatic invocation from
|
||||
``clearview_app.db_migrate`` passes a Config without a file.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
|
||||
from clearview_app.config import DATABASE_URL
|
||||
from clearview_app.db import _normalize_database_url, engine as app_engine
|
||||
from clearview_app.models import Base
|
||||
|
||||
config = context.config
|
||||
|
||||
if config.config_file_name is not None:
|
||||
try:
|
||||
fileConfig(config.config_file_name)
|
||||
except Exception: # noqa: BLE001 - logging config is best-effort
|
||||
pass
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Emit SQL to stdout without a live DB connection."""
|
||||
context.configure(
|
||||
url=_normalize_database_url(DATABASE_URL),
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
compare_type=True,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations against the live database via the app engine."""
|
||||
with app_engine.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
compare_type=True,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@ -0,0 +1,31 @@
|
||||
"""baseline schema
|
||||
|
||||
Captures the full Clearview schema as defined by the SQLAlchemy models at the
|
||||
time Alembic was adopted. Creating it via ``Base.metadata.create_all`` keeps the
|
||||
baseline guaranteed-identical to the models (the same DDL the app emitted before
|
||||
Alembic). Existing databases are ``stamp``-ed to this revision rather than
|
||||
re-running ``upgrade`` (see ``clearview_app.db_migrate``).
|
||||
|
||||
Revision ID: 0001_baseline
|
||||
Revises:
|
||||
Create Date: 2026-05-26
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from alembic import op
|
||||
|
||||
from clearview_app.models import Base
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0001_baseline"
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
Base.metadata.create_all(bind=op.get_bind())
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
Base.metadata.drop_all(bind=op.get_bind())
|
||||
@ -0,0 +1,63 @@
|
||||
"""convert timestamp columns to timestamptz
|
||||
|
||||
The app now uses timezone-aware UTC datetimes (DateTime(timezone=True)).
|
||||
Existing databases store naive ``timestamp without time zone`` values that were
|
||||
written as UTC, so we reinterpret them as UTC while converting. The conversion
|
||||
is guarded per column on the current type, so it is a no-op on databases whose
|
||||
columns are already ``timestamptz`` (e.g. a fresh DB created from the updated
|
||||
baseline models).
|
||||
|
||||
Revision ID: 0002_timestamptz
|
||||
Revises: 0001_baseline
|
||||
Create Date: 2026-05-26
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0002_timestamptz"
|
||||
down_revision = "0001_baseline"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
# Table -> datetime columns (names come from our own models, never user input).
|
||||
_COLUMNS: dict[str, tuple[str, ...]] = {
|
||||
"tenant_profiles": ("cert_expires_at", "created_at", "updated_at"),
|
||||
"scan_jobs": ("created_at", "updated_at", "started_at", "finished_at", "heartbeat_at"),
|
||||
"scan_targets": ("last_probe_at", "created_at", "updated_at", "started_at", "finished_at"),
|
||||
"permission_deviations": ("created_at",),
|
||||
}
|
||||
|
||||
|
||||
def _column_type(bind, table: str, column: str) -> str | None:
|
||||
return bind.execute(
|
||||
sa.text(
|
||||
"SELECT data_type FROM information_schema.columns "
|
||||
"WHERE table_name = :t AND column_name = :c"
|
||||
),
|
||||
{"t": table, "c": column},
|
||||
).scalar()
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
for table, columns in _COLUMNS.items():
|
||||
for column in columns:
|
||||
if _column_type(bind, table, column) == "timestamp without time zone":
|
||||
op.execute(
|
||||
f'ALTER TABLE {table} ALTER COLUMN {column} '
|
||||
f"TYPE timestamptz USING {column} AT TIME ZONE 'UTC'"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
for table, columns in _COLUMNS.items():
|
||||
for column in columns:
|
||||
if _column_type(bind, table, column) == "timestamp with time zone":
|
||||
op.execute(
|
||||
f'ALTER TABLE {table} ALTER COLUMN {column} '
|
||||
f"TYPE timestamp USING {column} AT TIME ZONE 'UTC'"
|
||||
)
|
||||
@ -1,11 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
"""Timezone-aware UTC now, used as the default for timestamp columns."""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
@ -22,9 +27,9 @@ class TenantProfile(Base):
|
||||
cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile")
|
||||
|
||||
@ -56,11 +61,11 @@ class ScanJob(Base):
|
||||
warning_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs")
|
||||
targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan")
|
||||
@ -79,14 +84,14 @@ class ScanTarget(Base):
|
||||
attempts: Mapped[int] = mapped_column(Integer, default=0)
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
last_probe_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||
last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
job: Mapped[ScanJob] = relationship(back_populates="targets")
|
||||
deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan")
|
||||
@ -108,7 +113,7 @@ class PermissionDeviation(Base):
|
||||
permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
|
||||
resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
|
||||
|
||||
job: Mapped[ScanJob] = relationship(back_populates="deviations")
|
||||
target: Mapped[ScanTarget] = relationship(back_populates="deviations")
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from urllib.parse import urlparse
|
||||
@ -32,7 +33,13 @@ class PermissionEntry:
|
||||
role_name: str
|
||||
|
||||
|
||||
_TOKEN_CACHE: dict[str, str] = {}
|
||||
# Cache maps cache_key -> (access_token, expires_at_epoch). Guarded by
|
||||
# _TOKEN_LOCK because the worker acquires tokens from multiple threads.
|
||||
_TOKEN_CACHE: dict[str, tuple[str, float]] = {}
|
||||
_TOKEN_LOCK = threading.Lock()
|
||||
# Reuse one MSAL app per (tenant, client, auth_method) so MSAL's own token
|
||||
# cache works and refreshes app tokens automatically.
|
||||
_MSAL_APPS: dict[str, "msal.ConfidentialClientApplication"] = {}
|
||||
|
||||
|
||||
def scan_site_for_deviations(
|
||||
@ -612,18 +619,20 @@ def _probe_hint(error: str, stage: str) -> str:
|
||||
return error[:220]
|
||||
|
||||
|
||||
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
|
||||
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
|
||||
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
|
||||
cached = _TOKEN_CACHE.get(cache_key)
|
||||
if cached:
|
||||
return cached
|
||||
def _get_msal_app(auth: AuthConfig, auth_method: str) -> "msal.ConfidentialClientApplication":
|
||||
"""Return a cached ConfidentialClientApplication for these credentials.
|
||||
|
||||
Reusing the app object lets MSAL's built-in token cache serve and refresh
|
||||
app-only tokens instead of re-authenticating on every call.
|
||||
"""
|
||||
app_key = f"{auth.tenant_id}|{auth.client_id}|{auth_method}"
|
||||
app = _MSAL_APPS.get(app_key)
|
||||
if app is not None:
|
||||
return app
|
||||
|
||||
scope = f"https://{host}/.default"
|
||||
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
|
||||
|
||||
if auth_method == "cert":
|
||||
client_credential = {
|
||||
client_credential: dict[str, str | None] | str | None = {
|
||||
"thumbprint": auth.cert_thumbprint,
|
||||
"private_key": auth.cert_private_key,
|
||||
}
|
||||
@ -635,16 +644,34 @@ def _get_token_for_host(host: str, auth: AuthConfig) -> str:
|
||||
authority=authority,
|
||||
client_credential=client_credential,
|
||||
)
|
||||
result = app.acquire_token_for_client(scopes=[scope])
|
||||
_MSAL_APPS[app_key] = app
|
||||
return app
|
||||
|
||||
if "access_token" not in result:
|
||||
error = result.get("error", "unknown")
|
||||
description = result.get("error_description", "")
|
||||
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
|
||||
|
||||
token = str(result["access_token"])
|
||||
_TOKEN_CACHE[cache_key] = token
|
||||
return token
|
||||
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
|
||||
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
|
||||
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
|
||||
|
||||
with _TOKEN_LOCK:
|
||||
cached = _TOKEN_CACHE.get(cache_key)
|
||||
if cached is not None and time.time() < cached[1]:
|
||||
return cached[0]
|
||||
|
||||
scope = f"https://{host}/.default"
|
||||
app = _get_msal_app(auth, auth_method)
|
||||
result = app.acquire_token_for_client(scopes=[scope])
|
||||
|
||||
if "access_token" not in result:
|
||||
error = result.get("error", "unknown")
|
||||
description = result.get("error_description", "")
|
||||
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
|
||||
|
||||
token = str(result["access_token"])
|
||||
# expires_in is seconds-from-now; refresh 60s early to avoid edge expiry.
|
||||
expires_in = int(result.get("expires_in", 3600))
|
||||
expires_at = time.time() + max(expires_in - 60, 0)
|
||||
_TOKEN_CACHE[cache_key] = (token, expires_at)
|
||||
return token
|
||||
|
||||
|
||||
def _iter_paged(url: str, headers: dict[str, str]):
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
# Valid scan types, mirrored by the frontend scan-type dropdowns. Used to
|
||||
# validate incoming job requests (FastAPI returns 422 on anything else).
|
||||
ScanType = Literal["sharepoint", "sharepoint_root", "mailbox", "entra_groups"]
|
||||
|
||||
|
||||
class CreateTenantProfileRequest(BaseModel):
|
||||
name: str
|
||||
@ -33,7 +38,7 @@ class TenantCertificateResponse(BaseModel):
|
||||
|
||||
|
||||
class CreateScanJobRequest(BaseModel):
|
||||
scan_type: str = "sharepoint"
|
||||
scan_type: ScanType = "sharepoint"
|
||||
site_urls: list[HttpUrl] = Field(default_factory=list)
|
||||
mailboxes: list[str] = Field(default_factory=list)
|
||||
scan_all_mailboxes: bool = False
|
||||
|
||||
@ -7,7 +7,7 @@ history, so operators can see exactly which image build is running.
|
||||
from __future__ import annotations
|
||||
|
||||
VERSION = "v0.1.0"
|
||||
BUILD = 1
|
||||
BUILD = 2
|
||||
|
||||
|
||||
def display_version() -> str:
|
||||
|
||||
@ -4,7 +4,7 @@ import logging
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
@ -47,17 +47,21 @@ class ScanWorker:
|
||||
|
||||
def _process_next_job(self) -> bool:
|
||||
with SessionLocal() as db:
|
||||
# Atomic claim: lock the chosen queued row and skip rows already
|
||||
# locked by another worker, so multiple workers/replicas never grab
|
||||
# the same job. The status flip is committed in this transaction.
|
||||
job = db.execute(
|
||||
select(ScanJob)
|
||||
.where(ScanJob.status == "queued")
|
||||
.order_by(ScanJob.created_at.asc())
|
||||
.limit(1)
|
||||
.with_for_update(skip_locked=True)
|
||||
).scalar_one_or_none()
|
||||
|
||||
if job is None:
|
||||
return False
|
||||
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
job.status = "running"
|
||||
job.started_at = now
|
||||
job.heartbeat_at = now
|
||||
@ -96,7 +100,7 @@ class ScanWorker:
|
||||
job = db.get(ScanJob, job_id)
|
||||
if not job:
|
||||
return
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
job.heartbeat_at = now
|
||||
job.updated_at = now
|
||||
job.finished_at = now
|
||||
@ -113,7 +117,7 @@ class ScanWorker:
|
||||
if not job or not target:
|
||||
return
|
||||
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
target.status = "running"
|
||||
target.started_at = now
|
||||
target.updated_at = now
|
||||
@ -128,7 +132,7 @@ class ScanWorker:
|
||||
target = db.get(ScanTarget, target_id)
|
||||
if not job or not target:
|
||||
return
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
target.status = "failed"
|
||||
target.attempts = 1
|
||||
target.error_message = f"Preflight: {probe.message}"
|
||||
@ -173,7 +177,7 @@ class ScanWorker:
|
||||
)
|
||||
)
|
||||
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
target.status = "completed"
|
||||
target.attempts = attempt
|
||||
target.error_message = None
|
||||
@ -203,7 +207,7 @@ class ScanWorker:
|
||||
if not job or not target:
|
||||
return
|
||||
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
target.status = "failed"
|
||||
target.attempts = max_attempts
|
||||
target.error_message = last_error
|
||||
@ -252,7 +256,7 @@ class ScanWorker:
|
||||
with SessionLocal() as db:
|
||||
target = db.get(ScanTarget, target_id)
|
||||
if target:
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(timezone.utc)
|
||||
target.last_probe_at = now
|
||||
target.last_probe_ok = result.ok
|
||||
target.last_probe_message = result.message
|
||||
@ -298,8 +302,8 @@ class ScanWorker:
|
||||
job.scan_activity = activity
|
||||
if items > 0:
|
||||
job.items_scanned += items
|
||||
job.heartbeat_at = datetime.utcnow()
|
||||
job.updated_at = datetime.utcnow()
|
||||
job.heartbeat_at = datetime.now(timezone.utc)
|
||||
job.updated_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
@ -2,6 +2,42 @@
|
||||
|
||||
This file documents changes on the develop branch of this project.
|
||||
|
||||
## 2026-05-26 — UI/UX: dead CSS removal, a11y, distinct risk colours, richer dashboard
|
||||
|
||||
### Added
|
||||
- **Dashboard enrichment** — a fourth KPI card **With errors** (`#statErrors`, counts jobs that are `completed_with_errors` or have `failed_targets > 0`) and a **Recent jobs** panel (`#dashRecentJobs`, last 5 jobs, each row clickable to jump to its details). Populated from the existing `/api/scan-jobs` list in `refreshJobs()` via a new `renderDashRecent()`; all interpolated fields run through `escHtml()`.
|
||||
|
||||
### Changed
|
||||
- **Removed dead CSS** — the pre-sidebar `.topbar`, `.topbar-actions`, and `.layout` rules (and their now-orphaned references inside the 930px/640px media queries) were deleted; the layout has used `.app-shell`/`.sidebar`/`.content` since the sidebar refactor.
|
||||
- **Accessibility** — focus outline strengthened from `rgba(14,165,233,0.38)` to a solid `var(--cv-accent)` (meets WCAG non-text 3:1) and now also covers `a:focus-visible`. On route changes (`applyRoute`), focus now moves to the new page's first heading (`h1/h2`, `tabindex=-1`) and `document.title` updates, so screen-reader/keyboard users land in the freshly shown content.
|
||||
- **Distinct risk colours** — the `risk.warn` badge changed from accent-blue (indistinguishable from `info`/`low`) to amber (`#854d0e` on `rgba(234,179,8,.18)`), giving a real low→high colour gradient.
|
||||
- **Consistent XSS escaping** — `job.id` and `job.source_type` in the Scan Jobs table are now passed through `escHtml()` (previously interpolated raw), matching the rest of the table.
|
||||
|
||||
## 2026-05-26 — Split monolithic main.py into route modules
|
||||
|
||||
### Changed
|
||||
- **`main.py` reduced from 1152 to 64 lines** — now a composition root that only wires the FastAPI app, scan-worker lifecycle, `/healthz`, `/api/version`, the `/` index + static mount, and `include_router` for the new route modules. All endpoint logic moved out verbatim (behaviour-preserving).
|
||||
- **New route modules** (flat modules at package level so existing single-dot relative imports stay unchanged — lower risk than a `routers/` subpackage): `api_tenants.py` (tenant profiles + certificate), `api_jobs.py` (all scan-job routes incl. CSV import, cancel/delete, resolve-sharing-links, resolve-groups, test-connection, Excel export, detail), `api_onboarding.py` (Microsoft connect/callback/scan-app). Shared helpers (`_resolve_credentials`, `_create_job_from_targets`, `_enumerate_all_*`, `_to_job_summary`, `_to_tenant_item`, `_build_export_filename`, `_sharing_link_risk_label`, `_extract_sharing_link_group_and_type`) extracted to `api_helpers.py`.
|
||||
- **Verified behaviour-preserving** — captured the OpenAPI route set before/after; both expose the identical 22 endpoints (`diff` empty). Built the image, booted against a fresh DB: `/healthz`, `/api/version`, `/api/tenants`, `/api/scan-jobs` all respond, invalid `scan_type` still returns 422, no startup errors.
|
||||
|
||||
## 2026-05-26 — Correctness P1: token cache, atomic job claim, timezone-aware datetimes, scan_type validation
|
||||
|
||||
### Changed
|
||||
- **Token cache now has TTL + thread lock + MSAL app reuse** (`scanners/sharepoint.py`) — `_TOKEN_CACHE` previously stored access tokens as plain strings forever, so long scans started failing with 401s once the ~1h token expired. It now stores `(token, expires_at)` and refreshes 60s before expiry, guarded by a new `_TOKEN_LOCK` (the worker fetches tokens from multiple threads). New `_get_msal_app()` caches one `ConfidentialClientApplication` per `(tenant, client, auth_method)` so MSAL's own token cache is reused instead of building a fresh app on every call.
|
||||
- **Atomic job claim** (`worker.py`) — the queued-job selection now uses `.with_for_update(skip_locked=True)` (`SELECT … FOR UPDATE SKIP LOCKED`), so multiple worker threads/replicas can never claim the same job. Behaviour is unchanged for the current single worker but is now replica-safe.
|
||||
- **Timezone-aware datetimes everywhere** — replaced all 24 `datetime.utcnow()` (naive, deprecated) with `datetime.now(timezone.utc)` across `models.py`, `worker.py`, `main.py`, and `cert.py`. SQLAlchemy datetime columns are now `DateTime(timezone=True)`; model defaults use a new `_utcnow()` helper. New Alembic migration `0002_timestamptz` converts existing `timestamp without time zone` columns to `timestamptz` (reinterpreting stored values as UTC), guarded per-column so it is a no-op on databases already timestamptz. **Behaviour note:** API datetimes now carry a UTC offset, so the frontend renders them correctly in local time (previously stored UTC was shown as if local).
|
||||
- **`scan_type` request validation** (`schemas.py`) — `CreateScanJobRequest.scan_type` is now `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` instead of free `str`; invalid values return HTTP 422. The response model keeps `str` so legacy rows never trigger a serialization error. Verified: `scan_type=bogus` → 422, valid type passes schema validation.
|
||||
|
||||
## 2026-05-26 — Alembic migrations replace startup `create_all` + raw ALTERs
|
||||
|
||||
### Added
|
||||
- **Alembic introduced (`alembic==1.14.0`)** — schema is now version-controlled instead of being patched at every startup. New `clearview_app/migrations/` package (`env.py` reuses the app's SQLAlchemy engine and `Base.metadata`; `versions/0001_baseline.py` baseline) and dev-only `containers/clearview/alembic.ini` for manual CLI use. The app builds the Alembic `Config` programmatically, so `alembic.ini` is not shipped in the image.
|
||||
- **Baseline migration `0001_baseline`** — creates the full current schema via `Base.metadata.create_all`, guaranteed identical to the models (the same DDL the app emitted before). Future schema changes become explicit Alembic revisions.
|
||||
- **Startup bootstrap `clearview_app/db_migrate.run_migrations()`** — idempotent, three cases: fresh DB → `upgrade head`; existing pre-Alembic DB (tables present, no `alembic_version`) → `stamp head` (adopt baseline without re-creating); already under Alembic → `upgrade head`. Verified end-to-end against throwaway databases (fresh upgrade, existing-DB stamp, re-run no-op) and a local image boot test (`/healthz` OK, schema + `alembic_version=0001_baseline`).
|
||||
|
||||
### Changed
|
||||
- **`main.py` startup** — `on_startup()` now calls `run_migrations()` instead of `Base.metadata.create_all(bind=engine)` + `_ensure_schema_columns()`. The 18-statement raw `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` block (`_ensure_schema_columns`) is removed; unused `Base`/`engine` imports dropped. The existing dev/prod database is adopted automatically (stamped to baseline) on first start of the new build — no manual migration step required.
|
||||
|
||||
## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
|
||||
|
||||
### Added
|
||||
|
||||
Loading…
Reference in New Issue
Block a user