Release (merge dev)

This commit is contained in:
Ivo Oskamp 2026-05-28 16:43:49 +02:00
commit a51694289c
66 changed files with 9901 additions and 1559 deletions

28
build.sh Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
# Clearview build wrapper. Keeps project-specific version handling out of the
# shared build-and-push.sh script.
#
# Usage:
# ./build.sh t # increment explicit dev/test build segment, then push :dev
# ./build.sh r # validate release version state, then run release build
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$repo_root"
mode="${1:-}"
case "$mode" in
t)
./scripts/bump-dev-build.py
;;
r)
./scripts/check-release-version.py
;;
*)
echo "usage: ./build.sh {t|r}" >&2
exit 2
;;
esac
exec ./build-and-push.sh "$@"

View File

@ -1,11 +1,33 @@
FROM python:3.12-slim FROM python:3.12-slim-bookworm
ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app/src ENV PYTHONPATH=/app/src
# Suppress PowerShell telemetry inside the container
ENV POWERSHELL_TELEMETRY_OPTOUT=1
ENV DOTNET_CLI_TELEMETRY_OPTOUT=1
WORKDIR /app WORKDIR /app
# ---------------------------------------------------------------------------
# PowerShell 7 + ExchangeOnlineManagement module
# Required for Exchange Online mailbox permission scanning.
# ---------------------------------------------------------------------------
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates curl \
&& curl -fsSL https://packages.microsoft.com/config/debian/12/packages-microsoft-prod.deb \
-o /tmp/packages-microsoft-prod.deb \
&& dpkg -i /tmp/packages-microsoft-prod.deb \
&& rm /tmp/packages-microsoft-prod.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends powershell \
&& pwsh -NoProfile -NonInteractive -Command \
"Set-PSRepository -Name PSGallery -InstallationPolicy Trusted; \
Install-Module -Name ExchangeOnlineManagement -Scope AllUsers -Force -AllowClobber" \
&& apt-get purge -y curl \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt ./requirements.txt COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt

View File

@ -0,0 +1,45 @@
# Alembic config for manual CLI use during development, e.g.:
# cd containers/clearview && DATABASE_URL=postgresql://... PYTHONPATH=src alembic revision -m "msg"
#
# The application itself does NOT read this file: clearview_app.db_migrate builds
# an Alembic Config programmatically and env.py takes the database URL from
# DATABASE_URL via clearview_app.config. sqlalchemy.url is therefore left blank.
[alembic]
script_location = src/clearview_app/migrations
prepend_sys_path = src
sqlalchemy.url =
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARNING
handlers = console
qualname =
[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@ -1,9 +1,13 @@
fastapi==0.115.0 fastapi==0.115.0
uvicorn[standard]==0.30.6 uvicorn[standard]==0.30.6
sqlalchemy==2.0.36 sqlalchemy==2.0.36
alembic==1.14.0
psycopg[binary]==3.2.3 psycopg[binary]==3.2.3
python-multipart==0.0.12 python-multipart==0.0.12
requests==2.32.3 requests==2.32.3
cryptography==44.0.2 cryptography==44.0.2
msal==1.32.0 msal==1.32.0
openpyxl==3.1.5 openpyxl==3.1.5
argon2-cffi==23.1.0
pytest==8.3.3
httpx==0.27.2

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
<svg width="286" height="72" viewBox="0 0 286 72" fill="none" xmlns="http://www.w3.org/2000/svg" role="img" aria-labelledby="logoTitleDark logoDescDark">
<title id="logoTitleDark">Clearview</title>
<desc id="logoDescDark">Clearview logo for dark backgrounds</desc>
<g transform="translate(0 2)">
<ellipse cx="34" cy="34" rx="34" ry="20" fill="#0EA5E9" fill-opacity="0.20"/>
<ellipse cx="34" cy="34" rx="34" ry="20" stroke="#38BDF8" stroke-width="2.4"/>
<circle cx="34" cy="34" r="12" fill="#0EA5E9" fill-opacity="0.30"/>
<circle cx="34" cy="34" r="12" stroke="#38BDF8" stroke-width="2"/>
<circle cx="34" cy="31" r="4" fill="#38BDF8"/>
<rect x="32" y="34" width="4" height="8" rx="2" fill="#38BDF8"/>
<path d="M8 22C16 14 25 10 34 10C43 10 52 14 60 22" stroke="#38BDF8" stroke-opacity="0.55" stroke-width="2"/>
</g>
<text x="80" y="44" font-size="36" font-weight="600" font-family="'Space Grotesk', 'Avenir Next', 'Segoe UI', sans-serif">
<tspan fill="#38BDF8">Clear</tspan><tspan fill="#F4F7FB">view</tspan>
</text>
</svg>

After

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -0,0 +1,22 @@
(function (global) {
async function postJson(url, body) {
const r = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
credentials: 'same-origin',
body: JSON.stringify(body),
});
let data = null;
try { data = await r.json(); } catch (_) {}
return { ok: r.ok, status: r.status, data };
}
async function getJson(url) {
const r = await fetch(url, { credentials: 'same-origin' });
let data = null;
try { data = await r.json(); } catch (_) {}
return { ok: r.ok, status: r.status, data };
}
global.ClearviewAuth = { postJson, getJson };
})(window);

View File

@ -3,331 +3,597 @@
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<title>Clearview | SharePoint Permission Deviations</title> <title>Clearview | Permission Deviations</title>
<meta name="description" content="Clearview scans SharePoint sites and reports only permission deviations from root level."> <meta name="description" content="Clearview scans Microsoft 365 SharePoint sites and Exchange Online mailboxes for permission deviations.">
<link rel="icon" href="assets/favicon.svg" type="image/svg+xml"> <link rel="icon" href="assets/favicon.svg" type="image/svg+xml">
<link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=IBM+Plex+Sans:wght@400;500;600&display=swap" rel="stylesheet"> <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=IBM+Plex+Sans:wght@400;500;600&display=swap" rel="stylesheet">
<link rel="stylesheet" href="styles.css"> <link rel="stylesheet" href="styles.css">
</head> </head>
<body> <body class="app-shell">
<div class="bg-orb orb-one" aria-hidden="true"></div> <div class="bg-orb orb-one" aria-hidden="true"></div>
<div class="bg-orb orb-two" aria-hidden="true"></div> <div class="bg-orb orb-two" aria-hidden="true"></div>
<header class="topbar slide-in"> <aside class="sidebar">
<a href="#" class="brand" aria-label="Clearview home"> <div class="sidebar-brand">
<img src="assets/clearview-logo.svg" alt="Clearview logo" class="brand-logo"> <img src="assets/clearview-logo-dark.svg" alt="Clearview" class="brand-logo">
</a>
<div class="topbar-actions">
<button id="refreshJobsBtn" class="btn btn-outline" type="button">Refresh</button>
</div> </div>
</header> <nav class="sidebar-nav">
<a href="#/dashboard" class="nav-link" data-route="dashboard">Dashboard</a>
<a href="#/jobs" class="nav-link" data-route="jobs">Scan Jobs</a>
<main class="layout"> <div class="nav-section">SharePoint</div>
<section class="hero fade-up" style="--delay: 0.05s"> <a href="#/scan/sharepoint" class="nav-link" data-route="scan-sharepoint">New SP Scan</a>
<p class="eyebrow">Root Permission Drift Detection</p>
<h1>Monitor SharePoint permissions across all your customers</h1> <div class="nav-section">Mailboxes</div>
<p class="lede"> <a href="#/scan/mailbox" class="nav-link" data-route="scan-mailbox">New Mailbox Scan</a>
Clearview scans down to folder and file level and reports only rights that deviate from the
root permissions of each site. <div class="nav-section">Entra</div>
</p> <a href="#/scan/entra" class="nav-link" data-route="scan-entra">New Entra Scan</a>
<div class="hero-stats" id="heroStats">
<article> <div class="nav-spacer"></div>
<span class="kpi" id="statTenants">0</span> <a href="#/tenants" class="nav-link" data-route="tenants">Tenants</a>
<span class="label">Tenants</span> <a href="#/settings" class="nav-link" data-route="settings">Settings</a>
</article> </nav>
<article> <div class="sidebar-foot">
<span class="kpi" id="statJobs">0</span> <span class="sidebar-version" id="appVersion" title="Running Clearview build"></span>
<span class="label">Jobs</span> </div>
</article> </aside>
<article>
<span class="kpi" id="statRunning">0</span> <main class="content">
<span class="label">Active Jobs</span> <header class="content-topbar">
</article> <div class="content-title" id="contentTitle">Dashboard</div>
<div class="content-actions">
<button id="refreshJobsBtn" class="btn btn-outline" type="button">Refresh</button>
<div class="header-user" id="userBadge"></div>
</div>
</header>
<!-- =================================================================== -->
<!-- Route: Dashboard -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="dashboard">
<div class="hero fade-up">
<p class="eyebrow">Permission Drift Detection</p>
<h1>Monitor Microsoft 365 permissions across all customers</h1>
<p class="lede">
Scan SharePoint sites for deviations from root permissions, and Exchange Online
mailboxes for delegated access (Full Access, Send As, Send on Behalf, folder delegations).
</p>
<div class="hero-stats">
<article>
<span class="kpi" id="statTenants">0</span>
<span class="label">Tenants</span>
</article>
<article>
<span class="kpi" id="statJobs">0</span>
<span class="label">Jobs</span>
</article>
<article>
<span class="kpi" id="statRunning">0</span>
<span class="label">Active Jobs</span>
</article>
<article>
<span class="kpi" id="statErrors">0</span>
<span class="label">With errors</span>
</article>
</div>
</div>
<div class="panel">
<div class="panel-header split">
<h2>Recent jobs</h2>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Job ID</th>
<th>Type</th>
<th>Tenant</th>
<th>Status</th>
<th>Targets</th>
<th>Updated</th>
</tr>
</thead>
<tbody id="dashRecentJobs">
<tr><td colspan="6">No jobs yet.</td></tr>
</tbody>
</table>
</div>
</div> </div>
</section> </section>
<!-- ------------------------------------------------------------------ --> <!-- =================================================================== -->
<!-- Tenants panel --> <!-- Route: Tenants -->
<!-- ------------------------------------------------------------------ --> <!-- =================================================================== -->
<section class="panel fade-up" style="--delay: 0.11s"> <section class="route-page" data-route-page="tenants" hidden>
<div class="panel-header split"> <div class="panel">
<h2>Tenants</h2> <div class="panel-header split">
<button id="addTenantBtn" class="btn btn-outline" type="button">Add Tenant</button> <h2>Tenants</h2>
</div> <button id="addTenantBtn" class="btn btn-outline" type="button">Add Tenant</button>
</div>
<!-- Add / Edit tenant form (hidden by default) --> <div id="addTenantForm" class="scan-form" hidden>
<div id="addTenantForm" class="scan-form" hidden> <h3>New Tenant</h3>
<h3>New Tenant</h3>
<!-- Automated onboarding --> <div id="tenantSetupAutomated" class="setup-note" hidden>
<div id="tenantSetupAutomated" class="setup-note" hidden> <h3>Azure App Setup (automated)</h3>
<h3>Azure App Setup (automated)</h3> <p>Connect to the customer's Microsoft tenant, then create a dedicated scan app automatically.</p>
<p>Connect to the customer's Microsoft tenant, then create a dedicated scan app automatically.</p> <ul>
<ul> <li>Click <strong>Connect Microsoft</strong> and approve admin consent.</li>
<li>Click <strong>Connect Microsoft</strong> and approve admin consent for the customer tenant</li> <li>Created scan app receives SharePoint <code>Sites.FullControl.All</code> with admin consent.</li>
<li>Created scan app receives SharePoint application permission: <code>Sites.FullControl.All</code></li> <li>For mailbox scanning, the <strong>Exchange.ManageAsApp</strong> permission and <strong>Exchange Administrator</strong> Entra role must be added manually after creation — see the <em>Enable mailbox scanning</em> section below.</li>
</ul> </ul>
<form id="onboardingForm" class="onboarding-form" action="#" method="post"> <form id="onboardingForm" class="onboarding-form" action="#" method="post">
<div class="onboarding-grid"> <div class="onboarding-grid">
<div class="onboarding-wide"> <div class="onboarding-wide">
<button id="connectMicrosoftBtn" class="btn btn-outline" type="button">Connect Microsoft</button> <button id="connectMicrosoftBtn" class="btn btn-outline" type="button">Connect Microsoft</button>
</div>
<label class="onboarding-wide">
Connected Tenant ID
<input id="connectedTenantId" type="text" placeholder="Connect first to populate tenant id">
</label>
<label class="onboarding-wide">
New Scan App Display Name
<input id="scanAppDisplayName" type="text" value="Clearview Scan App">
</label>
</div> </div>
<label class="onboarding-wide"> <button class="btn btn-outline" type="submit">Create Scan App Automatically</button>
Connected Tenant ID </form>
<input id="connectedTenantId" type="text" placeholder="Connect first to populate tenant id"> </div>
</label>
<label class="onboarding-wide"> <div id="tenantSetupManual" class="setup-note" hidden>
New Scan App Display Name <h3>Azure App Setup (manual)</h3>
<input id="scanAppDisplayName" type="text" value="Clearview Scan App"> <p>Create a dedicated Azure app registration in the customer's tenant.</p>
</label> <ol class="setup-steps">
</div> <li>Open <strong>Azure Portal</strong><strong>Entra ID → App registrations → New registration</strong>.</li>
<button class="btn btn-outline" type="submit">Create Scan App Automatically</button> <li>Pick a name (e.g. <em>Clearview Scan App</em>), select <strong>Single tenant</strong>, click <strong>Register</strong>.</li>
<li>Copy <strong>Directory (tenant) ID</strong> and <strong>Application (client) ID</strong>.</li>
<li>For SharePoint: <strong>API permissions → Add a permission → SharePoint → Application permissions</strong>, select <code>Sites.FullControl.All</code>, then click <strong>Grant admin consent</strong>.</li>
<li>For group resolution (recommended): also add <strong>Microsoft Graph → Application permissions → <code>Group.Read.All</code></strong> and grant admin consent. This lets Clearview expand Microsoft 365 / Azure AD security groups to their members and owners during the <em>Resolve groups</em> action. Without it, M365 group entries are kept as a single line.</li>
<li>The primary domain is the tenant's default Microsoft 365 domain — typically <code>&lt;tenantname&gt;.onmicrosoft.com</code>. Find it in <strong>Microsoft 365 admin center → Settings → Domains</strong> (the <em>Default</em> entry).</li>
</ol>
</div>
<div id="tenantSetupMailbox" class="setup-note">
<h3>Enable mailbox scanning (Exchange Online)</h3>
<p>Mailbox scanning needs additional permissions on the scan app, on top of the SharePoint setup. Skip this section if the tenant only needs SharePoint scans.</p>
<ol class="setup-steps">
<li><strong>Add the API permission.</strong> Azure Portal → <strong>Entra ID → App registrations → [your scan app] → API permissions → Add a permission → APIs my organization uses</strong>. Search for <em>Office 365 Exchange Online</em>, choose <strong>Application permissions</strong> and tick <code>Exchange.ManageAsApp</code>. Click <strong>Add permissions</strong>.</li>
<li><strong>Grant admin consent.</strong> Still on the API permissions page, click <strong>Grant admin consent for &lt;tenant&gt;</strong>. Verify the status column shows <em>Granted for &lt;tenant&gt;</em>.</li>
<li><strong>Assign the Exchange Administrator role.</strong> Entra ID → <strong>Roles and administrators</strong> → search <em>Exchange Administrator</em> → click the role → <strong>Add assignments</strong> → search the scan app by name (you'll need to switch the picker to include <em>Service principals / Apps</em>) → select it and confirm. This role grants the app the right to read mailbox permissions; it cannot be granted via Microsoft Graph and must be done in the portal.</li>
<li><strong>Generate a certificate.</strong> Save the tenant first (this section's form), then use the <strong>Certificate</strong> button in the Tenants table to generate a self-signed RSA-2048 key. The public PEM appears in a panel — click <strong>Download .cer</strong>.</li>
<li><strong>Upload the certificate to Azure.</strong> Back in the scan app, go to <strong>Certificates &amp; secrets → Certificates → Upload certificate</strong>, pick the downloaded <code>.cer</code> file, and confirm. Azure shows the SHA-1 thumbprint — it must match the one shown in the Tenants table.</li>
<li><strong>Fill in the Primary Domain field</strong> on the tenant form (e.g. <code>contoso.onmicrosoft.com</code>). Clearview uses this for <code>Connect-ExchangeOnline -Organization</code> and to auto-fill the Mailbox scan form.</li>
<li><strong>Test the connection.</strong> Run a <em>Scan all mailboxes</em> job for this tenant; preflight on the first target validates that authentication works end-to-end.</li>
</ol>
<p class="setup-hint">Exchange Online does <strong>not</strong> support client-secret app-only authentication. Mailbox scans require a certificate. The same certificate is reused for SharePoint scans, so generating it once is enough.</p>
</div>
<div class="auth-grid">
<label class="onboarding-wide">
Tenant Name (label for your reference)
<input id="newTenantName" type="text" placeholder="Contoso">
</label>
<label>
Tenant ID
<input id="newTenantTenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Primary Domain <span style="font-weight:400;font-size:0.82rem">(used by mailbox scanning, e.g. contoso.onmicrosoft.com)</span>
<input id="newTenantPrimaryDomain" type="text" placeholder="contoso.onmicrosoft.com">
</label>
<label>
Client ID
<input id="newTenantClientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret <span style="font-weight:400;font-size:0.82rem">(optional — not needed when using a certificate; not supported for mailbox scans)</span>
<input id="newTenantClientSecret" type="password" placeholder="Leave empty if you will generate a certificate">
</label>
</div>
<div class="form-actions">
<button id="saveTenantBtn" class="btn btn-solid" type="button">Save Tenant</button>
<button id="cancelTenantBtn" class="btn btn-outline" type="button">Cancel</button>
</div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Name</th>
<th>Tenant ID</th>
<th>Client ID</th>
<th>Auth</th>
<th>Added</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="tenantsTableBody">
<tr><td colspan="6">No tenants configured yet.</td></tr>
</tbody>
</table>
</div>
<div id="tenantFeedback" class="feedback" aria-live="polite"></div>
<div id="certBlock" class="cert-block" hidden>
<h3>Public Certificate</h3>
<p>Upload this certificate in <strong>Azure Portal → App registrations → [your app] → Certificates &amp; secrets → Certificates → Upload certificate</strong>.</p>
<textarea id="certPem" class="cert-pem" rows="10" readonly></textarea>
<div class="form-actions">
<button id="downloadCertBtn" class="btn btn-solid" type="button">Download .cer</button>
<button id="copyCertBtn" class="btn btn-outline" type="button">Copy to clipboard</button>
<button id="closeCertBtn" class="btn btn-outline" type="button">Close</button>
</div>
</div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Scan SharePoint -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="scan-sharepoint" hidden>
<div class="panel">
<div class="panel-header split">
<h2>New SharePoint Scan</h2>
<span class="badge">SharePoint</span>
</div>
<div class="scan-form auth-block">
<h3>Scan mode</h3>
<label>
What to collect
<select id="sharepointScanMode">
<option value="sharepoint">Deviations from root (libraries, folders, files)</option>
<option value="sharepoint_root">Root permissions only (site-level role assignments)</option>
</select>
</label>
<p class="setup-hint">
<strong>Deviations from root</strong> traverses every document library and reports only permissions that
differ from the site root baseline. <strong>Root permissions only</strong> lists the role assignments
on the site root itself — much faster, useful for an inventory of who has site-level access.
</p>
</div>
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Select Tenant Profile
<select id="scanTenantSelect" data-shared-tenant-select>
<option value="">-- Select a tenant --</option>
<option value="__manual__">Manual credentials...</option>
</select>
</label>
</div>
<div id="manualCredentialsBlock" class="scan-form auth-block" hidden>
<h3>Microsoft App Credentials</h3>
<div class="auth-grid">
<label>
Tenant ID
<input id="tenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Client ID
<input id="clientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret
<input id="clientSecret" type="password" placeholder="Client secret">
</label>
</div>
</div>
<div class="form-grid">
<form id="manualScanForm" class="scan-form" action="#" method="post">
<h3>Manual URLs</h3>
<label>
Site URLs (one per line)
<textarea id="manualUrls" rows="6" placeholder="https://contoso.sharepoint.com/sites/finance&#10;https://contoso.sharepoint.com/sites/hr"></textarea>
</label>
<label class="checkline">
<input id="manualSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue manual scan</button>
</form>
<form id="csvScanForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import</h3>
<label>
Microsoft Sites export (CSV)
<input id="csvFile" type="file" accept=".csv,text/csv">
</label>
<label class="checkline">
<input id="csvSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form> </form>
</div> </div>
<!-- Manual onboarding --> <div id="submitFeedback" class="feedback" aria-live="polite"></div>
<div id="tenantSetupManual" class="setup-note" hidden> </div>
<h3>Azure App Setup (manual)</h3> </section>
<p>Create a dedicated Azure app registration in the customer's tenant and grant it SharePoint access.</p>
<ol class="setup-steps"> <!-- =================================================================== -->
<li>Open <strong>Azure Portal</strong> and go to <strong>Entra ID &rarr; App registrations &rarr; New registration</strong>.</li> <!-- Route: Scan Mailbox -->
<li>Fill in a name (e.g. <em>Clearview Scan App</em>), select <strong>Single tenant</strong>, click <strong>Register</strong>.</li> <!-- =================================================================== -->
<li>Copy the <strong>Directory (tenant) ID</strong> and <strong>Application (client) ID</strong> from the Overview page.</li> <section class="route-page" data-route-page="scan-mailbox" hidden>
<li>Go to <strong>API permissions &rarr; Add &rarr; SharePoint &rarr; Application permissions</strong>, add <code>Sites.FullControl.All</code>.</li> <div class="panel">
<li>Click <strong>Grant admin consent</strong>.</li> <div class="panel-header split">
<li>Go to <strong>Certificates &amp; secrets &rarr; New client secret</strong>, copy the <strong>Value</strong> immediately.</li> <h2>New Mailbox Scan</h2>
</ol> <span class="badge">Exchange Online</span>
</div> </div>
<!-- Tenant fields --> <div class="scan-form auth-block">
<div class="auth-grid"> <h3>Tenant</h3>
<label class="onboarding-wide">
Tenant Name (label for your reference)
<input id="newTenantName" type="text" placeholder="Contoso">
</label>
<label> <label>
Tenant ID Select Tenant Profile
<input id="newTenantTenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000"> <select id="mailboxScanTenantSelect" data-shared-tenant-select>
</label> <option value="">-- Select a tenant --</option>
<label> </select>
Client ID
<input id="newTenantClientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret <span style="font-weight:400;font-size:0.82rem">(optional — not needed when using a certificate)</span>
<input id="newTenantClientSecret" type="password" placeholder="Leave empty if you will generate a certificate">
</label> </label>
<p class="setup-hint">
Mailbox scanning requires a certificate on the tenant profile and the
<code>Exchange.ManageAsApp</code> permission with the Exchange Administrator role.
Client-secret authentication is not supported for Exchange Online.
</p>
</div> </div>
<div class="form-actions"> <div class="form-grid">
<button id="saveTenantBtn" class="btn btn-solid" type="button">Save Tenant</button> <form id="manualMailboxForm" class="scan-form" action="#" method="post">
<button id="cancelTenantBtn" class="btn btn-outline" type="button">Cancel</button> <h3>Manual UPNs</h3>
<label>
User Principal Names (one per line)
<textarea id="manualMailboxes" rows="6" placeholder="alice@contoso.com&#10;bob@contoso.com"></textarea>
</label>
<button class="btn btn-solid" type="submit">Queue mailbox scan</button>
</form>
<form id="csvMailboxForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import</h3>
<label>
CSV with <code>UserPrincipalName</code> / <code>Email</code> column
<input id="csvMailboxFile" type="file" accept=".csv,text/csv">
</label>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
<form id="allMailboxesForm" class="scan-form" action="#" method="post">
<h3>All mailboxes in tenant</h3>
<label>
Organization (primary tenant domain)
<input id="allMailboxesOrg" type="text" placeholder="contoso.onmicrosoft.com">
</label>
<p class="setup-hint">
Clearview enumerates every mailbox in the tenant via <code>Get-EXOMailbox -ResultSize Unlimited</code>
and queues one target per mailbox. Can take 1060 seconds for large tenants.
</p>
<button class="btn btn-solid" type="submit">Queue scan for all mailboxes</button>
</form>
</div>
<div id="mailboxSubmitFeedback" class="feedback" aria-live="polite"></div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Scan Entra Groups -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="scan-entra" hidden>
<div class="panel">
<div class="panel-header split">
<h2>New Entra Group Scan</h2>
<span class="badge">Microsoft Graph</span>
</div>
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Select Tenant Profile
<select id="entraScanTenantSelect">
<option value="">-- Select a tenant --</option>
</select>
</label>
<p class="setup-hint">
Entra group scans use the <strong>Microsoft Graph</strong> API. The scan app needs the
Application permission <code>Group.Read.All</code> with admin consent. Authentication
uses the same tenant certificate as SharePoint and Mailbox scans.
</p>
</div>
<div class="form-grid">
<form id="manualEntraForm" class="scan-form" action="#" method="post">
<h3>Manual Object IDs</h3>
<label>
Group identifiers (one per line — Object ID, mail address, or display name)
<textarea id="manualEntraIds" rows="6" placeholder="00000000-0000-0000-0000-000000000000&#10;Pharmacology@contoso.onmicrosoft.com"></textarea>
</label>
<button class="btn btn-solid" type="submit">Queue Entra scan</button>
</form>
<form id="csvEntraForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import (Entra export)</h3>
<label>
CSV with <code>Object ID</code> column (Entra "Groups" export)
<input id="csvEntraFile" type="file" accept=".csv,text/csv">
</label>
<p class="setup-hint">
Export from Entra portal → Groups → All groups → Download. Clearview reads the
<code>Object ID</code> / <code>id</code> column; other columns are ignored.
</p>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
<form id="allEntraForm" class="scan-form" action="#" method="post">
<h3>All groups in tenant</h3>
<p class="setup-hint">
Enumerates every group in the tenant (any type) via Microsoft Graph and queues one
target per group. Can take 30120 seconds for large tenants.
</p>
<button class="btn btn-solid" type="submit">Queue scan for all groups</button>
</form>
</div>
<div id="entraSubmitFeedback" class="feedback" aria-live="polite"></div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Jobs (list + selected job details) -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="jobs" hidden>
<div class="panel">
<div class="panel-header split">
<h2>Scan Jobs</h2>
<div class="panel-header-right">
<select id="jobTypeFilter" class="filter-select">
<option value="">All types</option>
<option value="sharepoint">SharePoint deviations</option>
<option value="sharepoint_root">SharePoint root</option>
<option value="mailbox">Mailbox</option>
<option value="entra_groups">Entra groups</option>
</select>
<select id="jobTenantFilter" class="filter-select">
<option value="">All tenants</option>
</select>
<span id="jobAutoRefresh" class="badge">Auto refresh: on</span>
</div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Job ID</th>
<th>Type</th>
<th>Tenant</th>
<th>Source</th>
<th>Status</th>
<th>Targets</th>
<th>Items</th>
<th>Updated</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="jobsTableBody">
<tr><td colspan="9">No jobs yet.</td></tr>
</tbody>
</table>
</div> </div>
</div> </div>
<!-- Tenants table --> <div class="panel">
<div class="table-wrap"> <div class="panel-header split">
<table> <h2>Selected Job Details</h2>
<thead> <div class="panel-header-right">
<tr> <select id="jobSiteFilter" class="filter-select">
<th>Name</th> <option value="">All targets</option>
<th>Tenant ID</th> </select>
<th>Client ID</th> <button id="exportJobBtn" class="btn btn-outline" type="button" hidden>Export Excel</button>
<th>Auth</th> <span id="selectedJobId" class="badge">No selection</span>
<th>Added</th> </div>
<th>Actions</th> </div>
</tr>
</thead>
<tbody id="tenantsTableBody">
<tr><td colspan="6">No tenants configured yet.</td></tr>
</tbody>
</table>
</div>
<div id="tenantFeedback" class="feedback" aria-live="polite"></div> <div id="jobSummary" class="job-summary">Select a job to inspect targets and deviations.</div>
<div id="jobActivity" class="job-activity" hidden></div>
<!-- Certificate display block (shown after generation) --> <h3 class="subheading" id="targetsHeading">Targets</h3>
<div id="certBlock" class="cert-block" hidden> <div class="table-wrap compact-wrap">
<h3>Public Certificate</h3> <table>
<p>Upload this certificate in <strong>Azure Portal &rarr; App registrations &rarr; [your app] &rarr; Certificates &amp; secrets &rarr; Certificates &rarr; Upload certificate</strong>.</p> <thead id="targetsTableHead">
<textarea id="certPem" class="cert-pem" rows="10" readonly></textarea> <tr>
<div class="form-actions"> <th>URL</th>
<button id="downloadCertBtn" class="btn btn-solid" type="button">Download .cer</button> <th>Status</th>
<button id="copyCertBtn" class="btn btn-outline" type="button">Copy to clipboard</button> <th>Attempts</th>
<button id="closeCertBtn" class="btn btn-outline" type="button">Close</button> <th>Error</th>
<th>Connection test</th>
<th></th>
</tr>
</thead>
<tbody id="targetsTableBody">
<tr><td colspan="6">No job selected.</td></tr>
</tbody>
</table>
</div>
<div id="sharingLinksResolveBlock" hidden>
<h3 class="subheading">Resolve Sharing Links</h3>
<p class="resolve-hint">Fetch the actual recipients for the selected link types. Anonymous links have no resolvable members.</p>
<div id="sharingLinksTypes" class="sharing-link-types"></div>
<div class="form-actions" style="margin-top:0.6rem">
<button id="resolveSharingLinksBtn" class="btn btn-outline" type="button">Resolve</button>
</div>
<div id="resolveFeedback" class="feedback" aria-live="polite"></div>
</div>
<div id="resolveGroupsBlock" hidden>
<h3 class="subheading">Resolve SharePoint Groups</h3>
<p class="resolve-hint">
Expand SharePoint groups (Owners / Members / Visitors / custom site groups) to the underlying
user list. When a member is itself a Microsoft 365 / Azure AD group, Clearview recursively
expands it via Microsoft Graph (members + owners, depth 3) — requires
<code>Group.Read.All</code> on Microsoft Graph for that tenant. Without that permission the
M365 group lines stay collapsed. Members are written to the deviation rows and Excel export.
</p>
<div class="form-actions" style="margin-top:0.6rem">
<button id="resolveGroupsBtn" class="btn btn-outline" type="button">Resolve groups</button>
</div>
<div id="resolveGroupsFeedback" class="feedback" aria-live="polite"></div>
</div>
<h3 class="subheading">Permission Deviations</h3>
<div class="table-wrap deviations-wrap">
<table>
<thead id="deviationsTableHead">
<tr>
<th>Site</th>
<th>Object</th>
<th>Type</th>
<th>Principal</th>
<th>Role</th>
<th>Delta</th>
</tr>
</thead>
<tbody id="deviationsTableBody">
<tr><td colspan="6">No deviation data yet.</td></tr>
</tbody>
</table>
</div> </div>
</div> </div>
</section> </section>
<!-- ------------------------------------------------------------------ --> <!-- =================================================================== -->
<!-- Start New Scan panel --> <!-- Route: Settings (placeholder) -->
<!-- ------------------------------------------------------------------ --> <!-- =================================================================== -->
<section class="panel fade-up" style="--delay: 0.17s"> <section class="route-page" data-route-page="settings" hidden>
<div class="panel-header split"> <div class="panel">
<h2>Start New Scan</h2> <div class="panel-header split">
<span class="badge">Async job queue</span> <h2>Settings</h2>
</div>
<!-- Tenant selector -->
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Select Tenant Profile
<select id="scanTenantSelect">
<option value="">-- Select a tenant --</option>
<option value="__manual__">Manual credentials...</option>
</select>
</label>
</div>
<!-- Manual credentials (only shown when __manual__ selected) -->
<div id="manualCredentialsBlock" class="scan-form auth-block" hidden>
<h3>Microsoft App Credentials</h3>
<div class="auth-grid">
<label>
Tenant ID
<input id="tenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Client ID
<input id="clientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret
<input id="clientSecret" type="password" placeholder="Client secret">
</label>
</div> </div>
</div> <nav class="settings-tabs" role="tablist">
<a href="#/settings/general" class="settings-tab" data-settings-tab="general" role="tab">General</a>
<a href="#/settings/users" class="settings-tab" data-settings-tab="users" data-admin-only role="tab">Users</a>
<a href="#/settings/audit" class="settings-tab" data-settings-tab="audit" data-admin-only role="tab">Audit log</a>
</nav>
<div class="form-grid"> <div class="settings-pane" data-settings-pane="general">
<form id="manualScanForm" class="scan-form" action="#" method="post"> <p class="setup-hint">Runtime configuration is currently controlled via environment variables in <code>stack/.env</code>. See the <strong>TECHNICAL.md</strong> document for the full list (timeouts, retries, scan caps, onboarding).</p>
<h3>Manual URLs</h3>
<label>
Site URLs (one per line)
<textarea id="manualUrls" rows="6" placeholder="https://contoso.sharepoint.com/sites/finance&#10;https://contoso.sharepoint.com/sites/hr"></textarea>
</label>
<label class="checkline">
<input id="manualSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue manual scan</button>
</form>
<form id="csvScanForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import</h3>
<label>
Microsoft Sites export (CSV)
<input id="csvFile" type="file" accept=".csv,text/csv">
</label>
<label class="checkline">
<input id="csvSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
</div>
<div id="submitFeedback" class="feedback" aria-live="polite"></div>
</section>
<!-- ------------------------------------------------------------------ -->
<!-- Scan Jobs panel -->
<!-- ------------------------------------------------------------------ -->
<section class="panel fade-up" style="--delay: 0.23s">
<div class="panel-header split">
<h2>Scan Jobs</h2>
<div class="panel-header-right">
<select id="jobTenantFilter" class="filter-select">
<option value="">All tenants</option>
</select>
<span id="jobAutoRefresh" class="badge">Auto refresh: on</span>
</div> </div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Job ID</th>
<th>Tenant</th>
<th>Source</th>
<th>Status</th>
<th>Targets</th>
<th>Items</th>
<th>Updated</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="jobsTableBody">
<tr><td colspan="8">No jobs yet.</td></tr>
</tbody>
</table>
</div>
</section>
<!-- ------------------------------------------------------------------ --> <div class="settings-pane" data-settings-pane="users" hidden>
<!-- Selected Job Details panel --> <div class="users-toolbar"><button id="newUserBtn" class="btn btn-primary" type="button">New user</button></div>
<!-- ------------------------------------------------------------------ --> <div id="usersTable"></div>
<section class="panel fade-up" style="--delay: 0.29s">
<div class="panel-header split">
<h2>Selected Job Details</h2>
<div class="panel-header-right">
<select id="jobSiteFilter" class="filter-select">
<option value="">All sites</option>
</select>
<button id="exportJobBtn" class="btn btn-outline" type="button" hidden>Export Excel</button>
<span id="selectedJobId" class="badge">No selection</span>
</div> </div>
</div>
<div id="jobSummary" class="job-summary">Select a job to inspect targets and deviations.</div> <div class="settings-pane" data-settings-pane="audit" hidden>
<div id="jobActivity" class="job-activity" hidden></div> <div id="auditTable"></div>
<h3 class="subheading">Targets</h3>
<div class="table-wrap compact-wrap">
<table>
<thead>
<tr>
<th>URL</th>
<th>Status</th>
<th>Attempts</th>
<th>Error</th>
</tr>
</thead>
<tbody id="targetsTableBody">
<tr><td colspan="4">No job selected.</td></tr>
</tbody>
</table>
</div>
<div id="sharingLinksResolveBlock" hidden>
<h3 class="subheading">Resolve Sharing Links</h3>
<p class="resolve-hint">Fetch the actual recipients for the selected link types. Anonymous links have no resolvable members.</p>
<div id="sharingLinksTypes" class="sharing-link-types"></div>
<div class="form-actions" style="margin-top:0.6rem">
<button id="resolveSharingLinksBtn" class="btn btn-outline" type="button">Resolve</button>
</div> </div>
<div id="resolveFeedback" class="feedback" aria-live="polite"></div>
</div>
<h3 class="subheading">Permission Deviations</h3>
<div class="table-wrap deviations-wrap">
<table>
<thead>
<tr>
<th>Site</th>
<th>Object</th>
<th>Type</th>
<th>Principal</th>
<th>Role</th>
<th>Delta</th>
</tr>
</thead>
<tbody id="deviationsTableBody">
<tr><td colspan="6">No deviation data yet.</td></tr>
</tbody>
</table>
</div> </div>
</section> </section>
</main> </main>

View File

@ -0,0 +1,49 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Clearview — Sign in</title>
<link rel="stylesheet" href="/styles.css" />
</head>
<body class="auth-page">
<main class="auth-card">
<h1>Clearview</h1>
<p class="auth-sub">Sign in to continue</p>
<form id="loginForm">
<label>Username<input name="username" autocomplete="username" required autofocus /></label>
<label>Password<input name="password" type="password" autocomplete="current-password" required /></label>
<label class="auth-remember"><input name="remember" type="checkbox" /> Remember me for 30 days</label>
<button type="submit">Sign in</button>
<p id="loginError" class="auth-error" hidden></p>
</form>
</main>
<script src="/auth.js"></script>
<script>
(async function () {
const setup = await ClearviewAuth.getJson('/api/auth/setup-required');
if (setup.ok && setup.data && setup.data.setup_required) {
window.location.replace('/setup.html');
return;
}
const form = document.getElementById('loginForm');
const err = document.getElementById('loginError');
form.addEventListener('submit', async (ev) => {
ev.preventDefault();
err.hidden = true;
const fd = new FormData(form);
const res = await ClearviewAuth.postJson('/api/auth/login', {
username: fd.get('username'),
password: fd.get('password'),
remember: fd.get('remember') === 'on',
});
if (res.ok) {
window.location.replace('/');
} else {
err.textContent = (res.data && res.data.detail) || 'Sign-in failed';
err.hidden = false;
}
});
})();
</script>
</body>
</html>

View File

@ -0,0 +1,47 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Clearview — First-time setup</title>
<link rel="stylesheet" href="/styles.css" />
</head>
<body class="auth-page">
<main class="auth-card">
<h1>Welcome to Clearview</h1>
<p class="auth-sub">Create the first administrator account.</p>
<form id="setupForm">
<label>Username<input name="username" autocomplete="username" required autofocus /></label>
<label>Password (≥12 chars, letter + digit)<input name="password" type="password" autocomplete="new-password" required minlength="12" /></label>
<button type="submit">Create administrator</button>
<p id="setupError" class="auth-error" hidden></p>
</form>
</main>
<script src="/auth.js"></script>
<script>
(async function () {
const probe = await ClearviewAuth.getJson('/api/auth/setup-required');
if (!probe.ok || !probe.data || !probe.data.setup_required) {
window.location.replace('/login.html');
return;
}
const form = document.getElementById('setupForm');
const err = document.getElementById('setupError');
form.addEventListener('submit', async (ev) => {
ev.preventDefault();
err.hidden = true;
const fd = new FormData(form);
const res = await ClearviewAuth.postJson('/api/auth/setup', {
username: fd.get('username'),
password: fd.get('password'),
});
if (res.ok) {
window.location.replace('/');
} else {
err.textContent = (res.data && res.data.detail) || 'Setup failed';
err.hidden = false;
}
});
})();
</script>
</body>
</html>

View File

@ -55,38 +55,12 @@ body {
background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0)); background: radial-gradient(circle at center, rgba(3, 105, 161, 0.2), rgba(3, 105, 161, 0));
} }
.topbar {
width: min(1100px, calc(100% - 2rem));
margin: 1.1rem auto 0;
padding: 0.95rem 1.1rem;
border: 1px solid var(--cv-border);
border-radius: 18px;
background: rgba(255, 255, 255, 0.75);
backdrop-filter: blur(8px);
display: flex;
align-items: center;
justify-content: space-between;
box-shadow: 0 10px 24px rgba(20, 20, 19, 0.08);
}
.brand-logo { .brand-logo {
height: 42px; height: 42px;
width: auto; width: auto;
display: block; display: block;
} }
.topbar-actions {
display: flex;
gap: 0.6rem;
}
.layout {
width: min(1100px, calc(100% - 2rem));
margin: 1rem auto 2.5rem;
display: grid;
gap: 1rem;
}
.hero, .hero,
.panel { .panel {
border-radius: 22px; border-radius: 22px;
@ -131,7 +105,7 @@ h2 {
.hero-stats { .hero-stats {
margin-top: 1.3rem; margin-top: 1.3rem;
display: grid; display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr)); grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 0.75rem; gap: 0.75rem;
} }
@ -291,8 +265,9 @@ textarea {
input:focus, input:focus,
select:focus, select:focus,
textarea:focus, textarea:focus,
button:focus { button:focus,
outline: 2px solid rgba(14, 165, 233, 0.38); a:focus-visible {
outline: 2px solid var(--cv-accent);
outline-offset: 2px; outline-offset: 2px;
} }
@ -533,8 +508,8 @@ strong {
} }
.risk.warn { .risk.warn {
background: rgba(14, 165, 233, 0.15); background: rgba(234, 179, 8, 0.18);
color: var(--cv-accent-dark); color: #854d0e;
} }
.risk.high { .risk.high {
@ -584,12 +559,6 @@ strong {
} }
@media (max-width: 930px) { @media (max-width: 930px) {
.topbar {
flex-direction: column;
align-items: flex-start;
gap: 0.8rem;
}
.hero-stats { .hero-stats {
grid-template-columns: 1fr; grid-template-columns: 1fr;
} }
@ -616,11 +585,6 @@ strong {
} }
@media (max-width: 640px) { @media (max-width: 640px) {
.layout,
.topbar {
width: calc(100% - 1rem);
}
.hero, .hero,
.panel { .panel {
border-radius: 16px; border-radius: 16px;
@ -633,12 +597,256 @@ strong {
.hero h1 { .hero h1 {
max-width: none; max-width: none;
} }
}
.topbar-actions { /* ===========================================================================
width: 100%; Sidebar layout (added in mailbox-scanning refactor)
=========================================================================== */
.app-shell {
display: grid;
grid-template-columns: 220px 1fr;
min-height: 100vh;
}
.sidebar {
background: linear-gradient(180deg, #0f1d33 0%, #0b1424 100%);
color: #e6edf7;
display: flex;
flex-direction: column;
padding: 0;
position: sticky;
top: 0;
height: 100vh;
border-right: 1px solid rgba(255, 255, 255, 0.06);
}
.sidebar-brand {
height: 64px;
display: flex;
align-items: center;
padding: 0 1rem;
border-bottom: 1px solid rgba(255, 255, 255, 0.08);
}
.sidebar-brand .brand-logo {
height: 36px;
filter: brightness(1.05) saturate(1.1);
}
.sidebar-nav {
flex: 1;
display: flex;
flex-direction: column;
gap: 0.15rem;
padding: 0.75rem 0.5rem;
overflow-y: auto;
}
.sidebar-nav .nav-link {
display: block;
padding: 0.5rem 0.75rem;
border-radius: 8px;
color: rgba(230, 237, 247, 0.85);
text-decoration: none;
font-size: 0.9rem;
font-weight: 500;
transition: background-color 0.12s ease, color 0.12s ease;
}
.sidebar-nav .nav-link:hover {
background: rgba(255, 255, 255, 0.06);
color: #ffffff;
}
.sidebar-nav .nav-link.active {
background: rgba(14, 165, 233, 0.18);
color: #ffffff;
box-shadow: inset 2px 0 0 var(--cv-accent);
}
.sidebar-nav .nav-section {
padding: 0.65rem 0.75rem 0.25rem;
font-size: 0.7rem;
font-weight: 700;
letter-spacing: 0.08em;
text-transform: uppercase;
color: rgba(230, 237, 247, 0.45);
}
.sidebar-nav .nav-spacer {
flex: 1;
min-height: 1rem;
}
.sidebar-foot {
padding: 0.75rem 1rem;
border-top: 1px solid rgba(255, 255, 255, 0.08);
font-size: 0.78rem;
color: rgba(230, 237, 247, 0.55);
}
.content {
display: flex;
flex-direction: column;
min-width: 0;
padding: 0 1.25rem 2rem;
}
.content-topbar {
display: flex;
align-items: center;
justify-content: space-between;
height: 64px;
border-bottom: 1px solid var(--cv-border);
margin-bottom: 1rem;
}
.content-title {
font-family: "Space Grotesk", sans-serif;
font-size: 1.15rem;
font-weight: 600;
}
.content-actions {
display: flex;
gap: 0.5rem;
}
.route-page {
display: flex;
flex-direction: column;
gap: 1rem;
}
.route-page[hidden] {
display: none !important;
}
.setup-hint {
font-size: 0.85rem;
color: var(--cv-text-secondary);
margin: 0.5rem 0 0;
}
@media (max-width: 900px) {
.app-shell {
grid-template-columns: 1fr;
} }
.sidebar {
.topbar-actions .btn { position: static;
flex: 1; height: auto;
flex-direction: row;
flex-wrap: wrap;
}
.sidebar-nav {
flex-direction: row;
flex-wrap: wrap;
overflow-x: auto;
}
.sidebar-nav .nav-section,
.sidebar-nav .nav-spacer {
display: none;
}
.sidebar-foot {
display: none;
} }
} }
/* Hide the SPA until the auth gate resolves prevents the unauthenticated
dashboard flash before the redirect kicks in. Login/setup pages don't load
app.js, so they are unaffected. */
html[data-auth-pending] body { visibility: hidden; }
/* === Auth (login / setup) pages and header badge ============================== */
.auth-page {
display: flex; align-items: center; justify-content: center;
min-height: 100vh; margin: 0;
color: var(--cv-text-primary);
background: linear-gradient(165deg, var(--cv-page), #eef7fb 55%, #f6f4ee 100%);
font-family: inherit;
}
.auth-card {
width: 380px; max-width: 92vw; padding: 32px;
background: linear-gradient(180deg, rgba(255,255,255,0.92), rgba(255,255,255,0.82));
border: 1px solid var(--cv-border);
border-radius: 16px;
box-shadow: var(--cv-shadow);
}
.auth-card h1 { margin: 0 0 4px; font-size: 22px; color: var(--cv-text-primary); }
.auth-sub { margin: 0 0 20px; color: var(--cv-text-secondary); }
.auth-card form { display: flex; flex-direction: column; gap: 14px; }
.auth-card label { display: flex; flex-direction: column; gap: 6px; font-size: 13px; color: var(--cv-text-secondary); }
.auth-card input[type=text],
.auth-card input[type=password],
.auth-card input:not([type]) {
padding: 10px 12px;
background: var(--cv-white);
border: 1px solid var(--cv-border);
border-radius: 8px;
color: var(--cv-text-primary);
font: inherit;
}
.auth-card input:focus { outline: 2px solid var(--cv-accent); outline-offset: 1px; }
.auth-card .auth-remember { flex-direction: row; align-items: center; gap: 8px; color: var(--cv-text-secondary); }
.auth-card button[type=submit],
.auth-card button:not([id$=Cancel]) {
padding: 11px;
background: linear-gradient(135deg, var(--cv-accent), var(--cv-accent-dark));
border: 0; border-radius: 8px;
color: var(--cv-white); font-weight: 600; cursor: pointer;
}
.auth-card button:hover { filter: brightness(1.05); }
.auth-error {
background: rgba(220, 38, 38, 0.10); color: #b91c1c;
padding: 8px 10px; border-radius: 6px; font-size: 13px;
border: 1px solid rgba(220, 38, 38, 0.25);
}
.user-badge {
display: inline-flex; align-items: center; gap: 8px;
padding: 4px 10px; border: 1px solid var(--cv-border);
border-radius: 999px; font-size: 12px;
background: var(--cv-white); color: var(--cv-text-primary);
}
.user-badge button { background: transparent; border: 0; color: var(--cv-accent-dark); cursor: pointer; padding: 0; }
.user-badge button:hover { text-decoration: underline; }
/* Users / audit admin view */
.modal-back { position: fixed; inset: 0; background: rgba(20,20,19,0.45); display: flex; align-items: center; justify-content: center; z-index: 1000; }
.modal {
background: var(--cv-white); color: var(--cv-text-primary);
padding: 22px; border-radius: 12px; min-width: 340px;
border: 1px solid var(--cv-border);
box-shadow: var(--cv-shadow);
display: flex; flex-direction: column; gap: 12px;
}
.modal label { display: flex; flex-direction: column; gap: 6px; font-size: 13px; color: var(--cv-text-secondary); }
.modal input { padding: 8px 10px; background: var(--cv-white); border: 1px solid var(--cv-border); border-radius: 6px; color: var(--cv-text-primary); font: inherit; }
.modal select { padding: 8px 10px; border: 1px solid var(--cv-border); border-radius: 6px; background: var(--cv-white); color: var(--cv-text-primary); font: inherit; }
.modal-actions { display: flex; justify-content: flex-end; gap: 8px; }
.users-view table,
.settings-pane table { width: 100%; border-collapse: collapse; }
.users-view th, .users-view td,
.settings-pane th, .settings-pane td { padding: 8px 10px; border-bottom: 1px solid var(--cv-border); text-align: left; color: var(--cv-text-primary); }
.users-toolbar { margin: 12px 0; }
/* Settings sub-tabs */
.settings-tabs {
display: flex; gap: 4px; margin: 6px 0 18px;
border-bottom: 1px solid var(--cv-border);
}
.settings-tab {
padding: 8px 14px;
color: var(--cv-text-secondary);
text-decoration: none;
border-bottom: 2px solid transparent;
font-size: 14px;
font-weight: 500;
transition: color 120ms ease, border-color 120ms ease;
}
.settings-tab:hover { color: var(--cv-text-primary); }
.settings-tab.active {
color: var(--cv-accent-dark);
border-bottom-color: var(--cv-accent);
}
.settings-pane { padding-top: 4px; }

View File

@ -0,0 +1,321 @@
"""Shared helpers for the API route modules.
Extracted verbatim from the original monolithic ``main.py`` so the route
modules (``api_tenants``, ``api_jobs``) can share credential resolution, job
creation, response mapping, and export helpers without circular imports.
"""
from __future__ import annotations
import re
import uuid
from datetime import datetime, timezone
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from .db import SessionLocal
from .default_sites import is_default_site, normalize_site_url
from .models import ScanJob, ScanTarget, TenantProfile
from .scanners import AuthConfig
from .schemas import ScanJobCreateResponse, ScanJobSummary, TenantProfileItem
def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
"""
Extract (group_name, link_type) from principal values such as:
- SharingLinks.<guid>.<LinkType>.<guid>
- c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
"""
if not principal:
return None
text = principal.strip()
segments = [s.strip() for s in text.split("|") if s.strip()]
candidate = ""
for segment in reversed(segments):
if segment.lower().startswith("sharinglinks."):
candidate = segment
break
if not candidate and text.lower().startswith("sharinglinks."):
candidate = text
if not candidate:
return None
parts = candidate.split(".")
if len(parts) < 3:
return None
return candidate, parts[2]
_SCAN_TYPE_LABELS = {
"sharepoint": "Deviations",
"sharepoint_root": "Root",
"mailbox": "Mailbox",
"entra_groups": "EntraGroups",
}
def _build_export_filename(job: ScanJob, job_id: str) -> str:
tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
scan_type = job.scan_type or "sharepoint"
type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
short_id = job_id.replace("-", "")[-12:]
return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
def _enumerate_all_entra_groups(
tenant_id: str,
client_id: str,
client_secret: str | None,
profile_id: str | None,
) -> list[str]:
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
from .scanners import entra as _entra
try:
return _entra.list_all_groups(auth)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
def _enumerate_all_mailboxes(
organization: str | None,
tenant_id: str,
client_id: str,
client_secret: str | None,
profile_id: str | None,
) -> list[str]:
if not organization or "." not in organization:
raise HTTPException(
status_code=400,
detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
)
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
from .scanners import mailbox as _mailbox
try:
return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
def _resolve_credentials(
db,
tenant_profile_id: str | None,
tenant_id: str | None,
client_id: str | None,
client_secret: str | None,
) -> tuple[str, str, str | None, str | None]:
if tenant_profile_id:
profile = db.get(TenantProfile, tenant_profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
if not profile.client_secret and not profile.cert_thumbprint:
raise HTTPException(
status_code=400,
detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
)
return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
if tenant_id and client_id and client_secret:
return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
raise HTTPException(
status_code=400,
detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
)
def _create_job_from_targets(
raw_targets: list[str],
scan_type: str,
skip_default_sites: bool,
source_type: str,
tenant_id: str,
client_id: str,
client_secret: str,
tenant_profile_id: str | None = None,
) -> ScanJobCreateResponse:
accepted: list[str] = []
skipped_default_urls: list[str] = []
invalid: list[str] = []
seen: set[str] = set()
for raw in raw_targets:
if scan_type == "mailbox":
normalized = (raw or "").strip().lower()
if not normalized or "@" not in normalized:
invalid.append(raw)
continue
elif scan_type == "entra_groups":
normalized = (raw or "").strip()
if not normalized:
invalid.append(raw)
continue
else:
normalized = normalize_site_url(raw) or ""
if not normalized:
invalid.append(raw)
continue
if normalized in seen:
continue
seen.add(normalized)
if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
skipped_default_urls.append(normalized)
continue
accepted.append(normalized)
with SessionLocal() as db:
now = datetime.now(timezone.utc)
job = ScanJob(
id=str(uuid.uuid4()),
source_type=source_type,
scan_type=scan_type,
status="queued" if accepted else "completed",
skip_default_sites=skip_default_sites,
tenant_profile_id=tenant_profile_id,
auth_tenant_id=tenant_id,
auth_client_id=client_id,
auth_client_secret=client_secret,
total_targets=len(accepted),
skipped_targets=len(skipped_default_urls),
warning_message=None,
error_message=None,
created_at=now,
updated_at=now,
finished_at=now if not accepted else None,
)
if not accepted:
if scan_type == "mailbox":
job.warning_message = "No scannable mailboxes after validation"
else:
job.warning_message = "No scannable sites after validation and default-site filtering"
db.add(job)
db.flush()
for index, target in enumerate(accepted, start=1):
db.add(
ScanTarget(
job_id=job.id,
site_url=target,
source_row=index,
status="queued",
attempts=0,
created_at=now,
updated_at=now,
)
)
db.commit()
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
job = db.execute(stmt).unique().scalar_one()
return ScanJobCreateResponse(
job=_to_job_summary(job),
accepted_urls=accepted,
skipped_default_urls=skipped_default_urls,
invalid_urls=invalid,
)
def _to_job_summary(job: ScanJob) -> ScanJobSummary:
return ScanJobSummary(
id=job.id,
status=job.status,
source_type=job.source_type,
scan_type=job.scan_type or "sharepoint",
skip_default_sites=job.skip_default_sites,
tenant_profile_id=job.tenant_profile_id,
tenant_name=job.tenant_profile.name if job.tenant_profile else None,
total_targets=job.total_targets,
processed_targets=job.processed_targets,
successful_targets=job.successful_targets,
failed_targets=job.failed_targets,
skipped_targets=job.skipped_targets,
items_scanned=job.items_scanned,
scan_activity=job.scan_activity if job.status == "running" else None,
warning_message=job.warning_message,
error_message=job.error_message,
created_at=job.created_at,
updated_at=job.updated_at,
started_at=job.started_at,
finished_at=job.finished_at,
)
def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
return TenantProfileItem(
id=profile.id,
name=profile.name,
tenant_id=profile.tenant_id,
primary_domain=profile.primary_domain,
client_id=profile.client_id,
has_certificate=bool(profile.cert_thumbprint),
cert_thumbprint=profile.cert_thumbprint,
cert_expires_at=profile.cert_expires_at,
created_at=profile.created_at,
updated_at=profile.updated_at,
)
def _sharing_link_risk_label(principal: str) -> str:
if not principal.startswith("SharingLinks."):
return ""
parts = principal.split(".", 3)
link_type = parts[2] if len(parts) >= 3 else ""
if link_type.startswith("Anonymous"):
return "Critical"
if link_type == "Flexible":
return "High"
if link_type.startswith("Organization"):
return "Low"
if link_type.startswith("Direct"):
return "Low"
return "Unknown"

View File

@ -0,0 +1,645 @@
"""Scan-job routes: create, list, inspect, cancel, delete, resolve, export."""
from __future__ import annotations
import io
from datetime import datetime, timezone
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
from fastapi.responses import Response, StreamingResponse
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from .api_helpers import (
_build_export_filename,
_create_job_from_targets,
_enumerate_all_entra_groups,
_enumerate_all_mailboxes,
_extract_sharing_link_group_and_type,
_resolve_credentials,
_sharing_link_risk_label,
_to_job_summary,
)
from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
from .db import SessionLocal
from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
from .scanners import AuthConfig, probe
from .schemas import (
CreateScanJobRequest,
PermissionDeviationItem,
ProbeResultResponse,
ResolveGroupsResponse,
ResolveSharingLinksRequest,
ResolveSharingLinksResponse,
ScanJobCreateResponse,
ScanJobDetail,
ScanJobSummary,
ScanTargetItem,
SharingLinkTypesResponse,
)
router = APIRouter()
@router.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
with SessionLocal() as db:
tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
db=db,
tenant_profile_id=payload.tenant_profile_id,
tenant_id=payload.tenant_id,
client_id=payload.client_id,
client_secret=payload.client_secret,
)
source_type = "manual"
if payload.scan_type == "entra_groups":
if payload.scan_all_groups:
raw_targets = _enumerate_all_entra_groups(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
profile_id=profile_id,
)
source_type = "tenant_all"
else:
raw_targets = [str(g) for g in payload.group_ids]
elif payload.scan_type == "mailbox":
if payload.scan_all_mailboxes:
organization = payload.organization
if (not organization) and profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile and profile.primary_domain:
organization = profile.primary_domain
raw_targets = _enumerate_all_mailboxes(
organization=organization,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
profile_id=profile_id,
)
source_type = "tenant_all"
else:
raw_targets = [str(m) for m in payload.mailboxes]
else:
raw_targets = [str(item) for item in payload.site_urls]
return _create_job_from_targets(
raw_targets=raw_targets,
scan_type=payload.scan_type,
skip_default_sites=payload.skip_default_sites,
source_type=source_type,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
tenant_profile_id=profile_id,
)
@router.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
def create_scan_job_from_csv(
skip_default_sites: bool = True,
scan_type: str = Form("sharepoint"),
tenant_profile_id: str | None = Form(None),
tenant_id: str | None = Form(None),
client_id: str | None = Form(None),
client_secret: str | None = Form(None),
file: UploadFile = File(...),
) -> ScanJobCreateResponse:
with SessionLocal() as db:
resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
db=db,
tenant_profile_id=tenant_profile_id,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
)
content = file.file.read()
if scan_type == "mailbox":
parsed = parse_mailboxes_csv(content)
targets = parsed.mailboxes
elif scan_type == "entra_groups":
parsed = parse_entra_groups_csv(content)
targets = parsed.urls
else:
parsed = parse_sites_csv(content)
targets = parsed.urls
response = _create_job_from_targets(
raw_targets=targets,
scan_type=scan_type,
skip_default_sites=skip_default_sites,
source_type="csv",
tenant_id=resolved_tenant_id,
client_id=resolved_client_id,
client_secret=resolved_client_secret,
tenant_profile_id=profile_id,
)
if parsed.invalid_rows:
csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
with SessionLocal() as db:
job = db.get(ScanJob, response.job.id)
if job:
if job.warning_message:
job.warning_message = f"{job.warning_message} | {csv_warning}"
else:
job.warning_message = csv_warning
job.updated_at = datetime.now(timezone.utc)
db.commit()
db.refresh(job)
response.job.warning_message = job.warning_message
return response
@router.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
def cancel_scan_job(job_id: str) -> ScanJobSummary:
with SessionLocal() as db:
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
job = db.execute(stmt).unique().scalar_one_or_none()
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status not in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is not queued or running")
now = datetime.now(timezone.utc)
job.status = "cancelled"
job.updated_at = now
job.finished_at = now
job.scan_activity = None
db.commit()
db.refresh(job)
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
job = db.execute(stmt).unique().scalar_one()
return _to_job_summary(job)
@router.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
def delete_scan_job(job_id: str) -> Response:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
db.delete(job)
db.commit()
return Response(status_code=204)
@router.get("/api/scan-jobs", response_model=list[ScanJobSummary])
def list_scan_jobs(
limit: int = 20,
tenant_profile_id: str | None = None,
scan_type: str | None = None,
) -> list[ScanJobSummary]:
with SessionLocal() as db:
stmt = (
select(ScanJob)
.options(joinedload(ScanJob.tenant_profile))
.order_by(ScanJob.created_at.desc())
.limit(max(1, min(limit, 100)))
)
if tenant_profile_id:
stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
if scan_type:
stmt = stmt.where(ScanJob.scan_type == scan_type)
jobs = list(db.execute(stmt).unique().scalars())
return [_to_job_summary(job) for job in jobs]
@router.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
principals = list(
db.execute(
select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
).scalars()
)
type_counts: dict[str, int] = {}
for principal in principals:
parsed = _extract_sharing_link_group_and_type(str(principal or ""))
if not parsed:
continue
_group_name, link_type = parsed
type_counts[link_type] = type_counts.get(link_type, 0) + 1
return SharingLinkTypesResponse(type_counts=type_counts)
@router.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
from .scanner import resolve_sharing_link_members
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
all_deviations = list(
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
parsed = _extract_sharing_link_group_and_type(dev.principal)
if not parsed:
continue
group_name, link_type = parsed
if link_type not in payload.link_types:
continue
key = (dev.site_url, group_name)
groups.setdefault(key, []).append(dev.id)
updated_deviations = 0
for (site_url, group_name), dev_ids in groups.items():
members = resolve_sharing_link_members(site_url, group_name, auth)
resolved_members = ", ".join(members) if members else ""
with SessionLocal() as db:
for dev_id in dev_ids:
dev = db.get(PermissionDeviation, dev_id)
if dev:
dev.resolved_members = resolved_members
db.commit()
updated_deviations += len(dev_ids)
return ResolveSharingLinksResponse(
resolved_groups=len(groups),
updated_deviations=updated_deviations,
)
@router.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
"""
Expand group principals on this job's deviations and write each group's
member list to permission_deviations.resolved_members. Handles both
classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
assigned directly at root (via Microsoft Graph). Skips email-shape users
and SharingLinks groups (those have their own resolver).
"""
from .scanners.sharepoint import (
is_aad_group_principal,
is_sharepoint_group_principal,
resolve_aad_group_members,
resolve_sharing_link_members,
)
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
if (job.scan_type or "sharepoint") == "mailbox":
raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
all_deviations = list(
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group deviations by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
continue
key = (dev.site_url, dev.principal)
groups.setdefault(key, []).append(dev.id)
resolved = 0
skipped = 0
updated = 0
for (site_url, group_name), dev_ids in groups.items():
try:
if is_aad_group_principal(group_name):
members = resolve_aad_group_members(group_name, auth)
else:
members = resolve_sharing_link_members(site_url, group_name, auth)
except Exception: # noqa: BLE001
members = []
if not members:
skipped += 1
continue
resolved_text = ", ".join(members)
with SessionLocal() as db:
for dev_id in dev_ids:
dev = db.get(PermissionDeviation, dev_id)
if dev:
dev.resolved_members = resolved_text
db.commit()
resolved += 1
updated += len(dev_ids)
return ResolveGroupsResponse(
resolved_groups=resolved,
skipped_groups=skipped,
updated_deviations=updated,
)
@router.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
target = db.get(ScanTarget, target_id)
if not target or target.job_id != job_id:
raise HTTPException(status_code=404, detail="Target not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
site_url = target.site_url
job_scan_type = job.scan_type or "sharepoint"
result = probe(job_scan_type, site_url, auth)
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if not target:
raise HTTPException(status_code=404, detail="Target not found")
now = datetime.now(timezone.utc)
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
target.updated_at = now
db.commit()
db.refresh(target)
return ProbeResultResponse(
target_id=target.id,
ok=result.ok,
message=result.message,
last_probe_at=target.last_probe_at,
)
@router.get("/api/scan-jobs/{job_id}/export")
def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
import openpyxl
from openpyxl.styles import Font, PatternFill
with SessionLocal() as db:
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
if not job:
raise HTTPException(status_code=404, detail="Job not found")
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
if site_url:
targets_q = targets_q.where(ScanTarget.site_url == site_url)
targets = list(db.execute(targets_q).scalars())
deviations_q = (
select(PermissionDeviation)
.where(PermissionDeviation.job_id == job.id)
.order_by(PermissionDeviation.id.desc())
)
if site_url:
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
deviations = list(db.execute(deviations_q).scalars())
wb = openpyxl.Workbook()
header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
header_font_white = Font(bold=True, color="FFFFFF")
_risk_styles: dict[str, tuple] = {
"Critical": (
PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
Font(bold=True, color="7B0000"),
),
"High": (
PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
Font(bold=True, color="7C2D00"),
),
"Low": (
PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
Font(bold=True, color="0C4A6E"),
),
"Unknown": (
PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
Font(bold=True, color="555555"),
),
}
def _style_header(ws, headers):
ws.append(headers)
for cell in ws[1]:
cell.font = header_font_white
cell.fill = header_fill
scan_type = job.scan_type or "sharepoint"
target_label = {
"sharepoint": "Site URL",
"sharepoint_root": "Site URL",
"mailbox": "Mailbox",
"entra_groups": "Group",
}.get(scan_type, "Target")
# Targets sheet
ws_targets = wb.active
ws_targets.title = "Targets"
_style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
for t in targets:
ws_targets.append([
t.site_url,
t.status,
t.attempts,
t.error_message or "",
t.started_at.isoformat() if t.started_at else "",
t.finished_at.isoformat() if t.finished_at else "",
])
for col in ws_targets.columns:
ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
# Results sheet — name and columns depend on scan type
if scan_type == "mailbox":
ws_dev = wb.create_sheet("Mailbox Permissions")
_style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
for d in deviations:
ws_dev.append([
d.site_url,
d.object_url,
d.permission_type or d.object_type,
d.principal,
d.role_name,
])
elif scan_type == "entra_groups":
ws_dev = wb.create_sheet("Group Memberships")
_style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
for d in deviations:
ws_dev.append([
d.object_url,
d.permission_type or "",
d.principal,
d.role_name,
])
elif scan_type == "sharepoint_root":
ws_dev = wb.create_sheet("Root Permissions")
_style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
for d in deviations:
ws_dev.append([
d.site_url,
d.principal,
d.resolved_members or "",
d.role_name,
])
else:
ws_dev = wb.create_sheet("Deviations")
_style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
for d in deviations:
base = (d.site_url or "").rstrip("/")
obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
link_risk = _sharing_link_risk_label(d.principal)
ws_dev.append([
d.site_url,
obj_rel,
d.object_type,
d.principal,
link_risk,
d.resolved_members or "",
d.role_name,
d.delta_type,
])
if link_risk in _risk_styles:
risk_fill, risk_font = _risk_styles[link_risk]
risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
risk_cell.fill = risk_fill
risk_cell.font = risk_font
for col in ws_dev.columns:
ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
buf = io.BytesIO()
wb.save(buf)
buf.seek(0)
filename = _build_export_filename(job, job_id)
return StreamingResponse(
buf,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@router.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
with SessionLocal() as db:
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
if not job:
raise HTTPException(status_code=404, detail="Job not found")
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
if site_url:
targets_q = targets_q.where(ScanTarget.site_url == site_url)
targets = list(db.execute(targets_q).scalars())
deviations_q = (
select(PermissionDeviation)
.where(PermissionDeviation.job_id == job.id)
.order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
)
if site_url:
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
else:
deviations_q = deviations_q.limit(1000)
deviations = list(db.execute(deviations_q).scalars())
return ScanJobDetail(
**_to_job_summary(job).model_dump(),
targets=[
ScanTargetItem(
id=t.id,
site_url=t.site_url,
status=t.status,
attempts=t.attempts,
error_message=t.error_message,
started_at=t.started_at,
finished_at=t.finished_at,
last_probe_at=t.last_probe_at,
last_probe_ok=t.last_probe_ok,
last_probe_message=t.last_probe_message,
)
for t in targets
],
deviations=[
PermissionDeviationItem(
id=d.id,
site_url=d.site_url,
object_url=d.object_url,
object_type=d.object_type,
principal=d.principal,
role_name=d.role_name,
delta_type=d.delta_type,
permission_type=d.permission_type,
resolved_members=d.resolved_members,
created_at=d.created_at,
)
for d in deviations
],
)

View File

@ -0,0 +1,76 @@
"""Microsoft onboarding routes (admin-consent connect + scan-app creation)."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
from fastapi.responses import RedirectResponse
from .onboarding import (
OnboardingError,
consume_callback_state,
create_connect_url,
create_scan_app_for_tenant,
)
from .schemas import (
ConnectMicrosoftResponse,
CreateScanAppRequest,
CreateScanAppResponse,
)
router = APIRouter()
@router.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
try:
result = create_scan_app_for_tenant(
tenant_id=payload.tenant_id,
display_name=payload.display_name,
)
except OnboardingError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
return CreateScanAppResponse(
tenant_id=result.tenant_id,
client_id=result.client_id,
client_secret=result.client_secret,
app_object_id=result.app_object_id,
service_principal_id=result.service_principal_id,
display_name=result.display_name,
)
@router.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
try:
return ConnectMicrosoftResponse(connect_url=create_connect_url())
except OnboardingError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@router.get("/api/onboarding/microsoft/callback")
def onboarding_microsoft_callback(
tenant: str | None = None,
state: str | None = None,
error: str | None = None,
error_description: str | None = None,
) -> RedirectResponse:
if error:
message = (error_description or error).replace(" ", "+")
return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
if not state or not consume_callback_state(state):
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
if not tenant:
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
@router.get("/api/onboarding/status")
def onboarding_status() -> dict[str, bool]:
from . import config
automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
return {"automated_available": automated}

View File

@ -0,0 +1,86 @@
"""Tenant profile + certificate routes."""
from __future__ import annotations
import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
from sqlalchemy import select, text
from .api_helpers import _to_tenant_item
from .cert import generate_tenant_certificate
from .db import SessionLocal
from .models import TenantProfile
from .schemas import (
CreateTenantProfileRequest,
TenantCertificateResponse,
TenantProfileItem,
)
router = APIRouter()
@router.get("/api/tenants", response_model=list[TenantProfileItem])
def list_tenants() -> list[TenantProfileItem]:
with SessionLocal() as db:
profiles = list(
db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
)
return [_to_tenant_item(p) for p in profiles]
@router.post("/api/tenants", response_model=TenantProfileItem, status_code=201)
def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
with SessionLocal() as db:
now = datetime.now(timezone.utc)
profile = TenantProfile(
id=str(uuid.uuid4()),
name=payload.name.strip(),
tenant_id=payload.tenant_id.strip(),
primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
client_id=payload.client_id.strip(),
client_secret=payload.client_secret.strip() if payload.client_secret else None,
created_at=now,
updated_at=now,
)
db.add(profile)
db.commit()
db.refresh(profile)
return _to_tenant_item(profile)
@router.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse)
def generate_certificate(profile_id: str) -> TenantCertificateResponse:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
result = generate_tenant_certificate()
profile.cert_private_key = result.private_key_pem
profile.cert_public_pem = result.public_cert_pem
profile.cert_thumbprint = result.thumbprint
profile.cert_expires_at = result.expires_at
profile.updated_at = datetime.now(timezone.utc)
db.commit()
return TenantCertificateResponse(
thumbprint=result.thumbprint,
expires_at=result.expires_at,
public_cert_pem=result.public_cert_pem,
)
@router.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
def delete_tenant(profile_id: str) -> Response:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
# Detach jobs from this profile before deleting
db.execute(
text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
{"pid": profile_id},
)
db.delete(profile)
db.commit()
return Response(status_code=204)

View File

@ -0,0 +1 @@
"""Authentication, session, and user-management subsystem."""

View File

@ -0,0 +1,20 @@
"""Single-entry helper for writing rows to the auth audit log."""
from __future__ import annotations
from typing import Any
from sqlalchemy.orm import Session
from .models import AuthAudit
def record_event(
db: Session,
*,
event: str,
user_id: int | None = None,
ip: str | None = None,
detail: dict[str, Any] | None = None,
) -> None:
"""Add an AuthAudit row to the session. Caller commits."""
db.add(AuthAudit(event=event, user_id=user_id, ip=ip, detail=detail))

View File

@ -0,0 +1,52 @@
"""FastAPI dependencies that gate API endpoints behind a session."""
from __future__ import annotations
from typing import Annotated
from fastapi import Cookie, Depends, HTTPException, Request, status
from sqlalchemy.orm import Session
from ..config import COOKIE_NAME # noqa: F401
from ..db import SessionLocal
from . import sessions as S
from .models import User, UserSession
AuthedUser = User
def get_db():
db: Session = SessionLocal()
try:
yield db
finally:
db.close()
def _load_session(db: Session, sid: str | None) -> tuple[User, UserSession]:
session = S.lookup_and_refresh(db, sid)
if session is None:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Not authenticated")
user = db.get(User, session.user_id)
if user is None or not user.is_active:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Not authenticated")
db.commit()
return user, session
def require_user(
db: Annotated[Session, Depends(get_db)],
clearview_session: Annotated[str | None, Cookie()] = None,
) -> User:
user, _ = _load_session(db, clearview_session)
return user
def require_admin(
db: Annotated[Session, Depends(get_db)],
clearview_session: Annotated[str | None, Cookie()] = None,
) -> User:
user, _ = _load_session(db, clearview_session)
if user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin required")
return user

View File

@ -0,0 +1,87 @@
"""SQLAlchemy models for authentication, sessions, and audit log.
A dedicated ``Base`` is used so these tables can be created independently
of the existing scan/tenant models in tests; in production they coexist
in the same database under Alembic.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, JSON, String, Text, TypeDecorator
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
class UTCDateTime(TypeDecorator):
"""DateTime that always returns tz-aware UTC values.
SQLite (used in tests) does not preserve tzinfo on roundtrip even with
``DateTime(timezone=True)``. This decorator normalises stored and loaded
values to UTC-aware datetimes so app code can rely on tz arithmetic.
"""
impl = DateTime(timezone=True)
cache_ok = True
def process_bind_param(self, value, dialect):
if value is None:
return None
if value.tzinfo is None:
value = value.replace(tzinfo=timezone.utc)
return value.astimezone(timezone.utc)
def process_result_value(self, value, dialect):
if value is None:
return None
if value.tzinfo is None:
return value.replace(tzinfo=timezone.utc)
return value.astimezone(timezone.utc)
class Base(DeclarativeBase):
pass
class User(Base):
__tablename__ = "users"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
username: Mapped[str] = mapped_column(String(128), unique=True, nullable=False, index=True)
password_hash: Mapped[str] = mapped_column(Text, nullable=False)
role: Mapped[str] = mapped_column(String(16), nullable=False)
is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
created_at: Mapped[datetime] = mapped_column(UTCDateTime(), default=_utcnow, nullable=False)
updated_at: Mapped[datetime] = mapped_column(UTCDateTime(), default=_utcnow, nullable=False)
class UserSession(Base):
__tablename__ = "user_sessions"
id: Mapped[str] = mapped_column(String(64), primary_key=True)
user_id: Mapped[int] = mapped_column(
Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
)
created_at: Mapped[datetime] = mapped_column(UTCDateTime(), default=_utcnow, nullable=False)
expires_at: Mapped[datetime] = mapped_column(UTCDateTime(), nullable=False, index=True)
last_seen_at: Mapped[datetime] = mapped_column(UTCDateTime(), default=_utcnow, nullable=False)
ip: Mapped[str | None] = mapped_column(String(64), nullable=True)
user_agent: Mapped[str | None] = mapped_column(Text, nullable=True)
remember: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
class AuthAudit(Base):
__tablename__ = "auth_audit"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ts: Mapped[datetime] = mapped_column(UTCDateTime(), default=_utcnow, nullable=False, index=True)
user_id: Mapped[int | None] = mapped_column(
Integer, ForeignKey("users.id", ondelete="SET NULL"), nullable=True
)
event: Mapped[str] = mapped_column(String(32), nullable=False, index=True)
ip: Mapped[str | None] = mapped_column(String(64), nullable=True)
detail: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True)

View File

@ -0,0 +1,139 @@
"""Routes for login, logout, identity, and initial setup."""
from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
from sqlalchemy import func, select
from sqlalchemy.orm import Session
from ..config import COOKIE_NAME, COOKIE_SAMESITE, COOKIE_SECURE
from . import sessions as S
from .audit import record_event
from .dependencies import get_db, require_user
from .models import User, UserSession
from .schemas import LoginRequest, MeResponse, SetupRequest, SetupRequiredResponse
from .security import (
PasswordPolicyError,
hash_password,
validate_password,
verify_password,
)
router = APIRouter()
def _ip(request: Request) -> str | None:
return request.client.host if request.client else None
def _set_cookie(response: Response, sid: str, *, remember: bool) -> None:
max_age = 30 * 24 * 3600 if remember else 8 * 3600
response.set_cookie(
key=COOKIE_NAME,
value=sid,
max_age=max_age,
httponly=True,
secure=COOKIE_SECURE,
samesite=COOKIE_SAMESITE,
path="/",
)
def _clear_cookie(response: Response) -> None:
response.delete_cookie(key=COOKIE_NAME, path="/")
def _users_count(db: Session) -> int:
return db.execute(select(func.count(User.id))).scalar_one()
@router.get("/api/auth/setup-required", response_model=SetupRequiredResponse)
def setup_required(db: Annotated[Session, Depends(get_db)]) -> SetupRequiredResponse:
return SetupRequiredResponse(setup_required=_users_count(db) == 0)
@router.post("/api/auth/setup", response_model=MeResponse)
def setup(
payload: SetupRequest,
request: Request,
response: Response,
db: Annotated[Session, Depends(get_db)],
) -> MeResponse:
if _users_count(db) > 0:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Setup already completed")
try:
validate_password(payload.password)
except PasswordPolicyError as exc:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
user = User(
username=payload.username,
password_hash=hash_password(payload.password),
role="admin",
is_active=True,
)
db.add(user); db.flush()
sid, _ = S.create_session(
db, user_id=user.id, remember=False, ip=_ip(request), user_agent=request.headers.get("user-agent")
)
record_event(db, event="setup", user_id=user.id, ip=_ip(request), detail={"username": user.username})
db.commit()
_set_cookie(response, sid, remember=False)
return MeResponse(username=user.username, role=user.role) # type: ignore[arg-type]
@router.post("/api/auth/login", response_model=MeResponse)
def login(
payload: LoginRequest,
request: Request,
response: Response,
db: Annotated[Session, Depends(get_db)],
) -> MeResponse:
user = db.execute(select(User).where(User.username == payload.username)).scalar_one_or_none()
if user is None or not user.is_active or not verify_password(payload.password, user.password_hash):
record_event(
db,
event="login_fail",
user_id=user.id if user else None,
ip=_ip(request),
detail={"username": payload.username},
)
db.commit()
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
sid, _ = S.create_session(
db, user_id=user.id, remember=payload.remember, ip=_ip(request), user_agent=request.headers.get("user-agent")
)
record_event(db, event="login_ok", user_id=user.id, ip=_ip(request), detail=None)
S.purge_expired(db)
db.commit()
_set_cookie(response, sid, remember=payload.remember)
return MeResponse(username=user.username, role=user.role) # type: ignore[arg-type]
@router.post("/api/auth/logout")
def logout(
request: Request,
response: Response,
db: Annotated[Session, Depends(get_db)],
) -> dict[str, bool]:
sid = request.cookies.get(COOKIE_NAME)
user_id: int | None = None
if sid:
existing = db.get(UserSession, sid)
if existing is not None:
user_id = existing.user_id
S.revoke(db, sid)
record_event(db, event="logout", user_id=user_id, ip=_ip(request), detail=None)
db.commit()
_clear_cookie(response)
return {"ok": True}
@router.get("/api/auth/me", response_model=MeResponse)
def me(user: Annotated[User, Depends(require_user)]) -> MeResponse:
return MeResponse(username=user.username, role=user.role) # type: ignore[arg-type]

View File

@ -0,0 +1,59 @@
"""Pydantic schemas for the auth and users routers."""
from __future__ import annotations
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
class LoginRequest(BaseModel):
username: str = Field(min_length=1, max_length=128)
password: str = Field(min_length=1, max_length=1024)
remember: bool = False
class SetupRequest(BaseModel):
username: str = Field(min_length=1, max_length=128)
password: str = Field(min_length=1, max_length=1024)
class MeResponse(BaseModel):
username: str
role: Literal["admin", "user"]
class SetupRequiredResponse(BaseModel):
setup_required: bool
class UserItem(BaseModel):
id: int
username: str
role: Literal["admin", "user"]
is_active: bool
created_at: datetime
class CreateUserRequest(BaseModel):
username: str = Field(min_length=1, max_length=128)
password: str = Field(min_length=1, max_length=1024)
role: Literal["admin", "user"] = "user"
class UpdateUserRequest(BaseModel):
role: Literal["admin", "user"] | None = None
is_active: bool | None = None
class ResetPasswordRequest(BaseModel):
password: str = Field(min_length=1, max_length=1024)
class AuditItem(BaseModel):
id: int
ts: datetime
user_id: int | None
event: str
ip: str | None
detail: dict | None

View File

@ -0,0 +1,44 @@
"""Password hashing, password-policy validation, and session-id generation."""
from __future__ import annotations
import uuid
from argon2 import PasswordHasher
from argon2.exceptions import InvalidHashError, VerificationError, VerifyMismatchError
class PasswordPolicyError(ValueError):
"""Raised when a candidate password does not meet the policy."""
_hasher = PasswordHasher()
MIN_LENGTH = 12
def validate_password(pw: str) -> None:
"""Enforce: length >= 12, at least one letter and one digit."""
if len(pw) < MIN_LENGTH:
raise PasswordPolicyError(f"Password must be at least {MIN_LENGTH} characters.")
if not any(c.isalpha() for c in pw):
raise PasswordPolicyError("Password must contain at least one letter.")
if not any(c.isdigit() for c in pw):
raise PasswordPolicyError("Password must contain at least one digit.")
def hash_password(pw: str) -> str:
return _hasher.hash(pw)
def verify_password(pw: str, encoded: str) -> bool:
try:
return _hasher.verify(encoded, pw)
except (VerifyMismatchError, InvalidHashError, VerificationError):
return False
except Exception:
return False
def new_session_id() -> str:
"""Opaque 128-bit session identifier rendered as 32 hex chars."""
return uuid.uuid4().hex

View File

@ -0,0 +1,72 @@
"""Session lifecycle: create, look up + refresh, revoke, purge expired."""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from sqlalchemy import delete
from sqlalchemy.orm import Session
from .models import UserSession
from .security import new_session_id
SLIDING_TTL = timedelta(hours=8)
REMEMBER_TTL = timedelta(days=30)
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
def create_session(
db: Session,
*,
user_id: int,
remember: bool,
ip: str | None,
user_agent: str | None,
) -> tuple[str, datetime]:
ttl = REMEMBER_TTL if remember else SLIDING_TTL
expires = _utcnow() + ttl
sid = new_session_id()
db.add(
UserSession(
id=sid,
user_id=user_id,
expires_at=expires,
ip=ip,
user_agent=user_agent,
remember=remember,
)
)
db.flush()
return sid, expires
def lookup_and_refresh(db: Session, sid: str | None) -> UserSession | None:
if not sid:
return None
row = db.get(UserSession, sid)
if row is None:
return None
now = _utcnow()
expires = row.expires_at if row.expires_at.tzinfo else row.expires_at.replace(tzinfo=timezone.utc)
if expires <= now:
return None
row.last_seen_at = now
if not row.remember:
row.expires_at = now + SLIDING_TTL
return row
def revoke(db: Session, sid: str) -> None:
db.execute(delete(UserSession).where(UserSession.id == sid))
def revoke_all_for_user(db: Session, user_id: int) -> int:
res = db.execute(delete(UserSession).where(UserSession.user_id == user_id))
return res.rowcount or 0
def purge_expired(db: Session) -> int:
res = db.execute(delete(UserSession).where(UserSession.expires_at <= _utcnow()))
return res.rowcount or 0

View File

@ -0,0 +1,152 @@
"""Admin endpoints: user CRUD, password reset, audit log."""
from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Request
from sqlalchemy import select
from sqlalchemy.orm import Session
from . import sessions as S
from .audit import record_event
from .dependencies import get_db, require_admin
from .models import AuthAudit, User
from .schemas import (
AuditItem,
CreateUserRequest,
ResetPasswordRequest,
UpdateUserRequest,
UserItem,
)
from .security import PasswordPolicyError, hash_password, validate_password
router = APIRouter()
def _ip(request: Request) -> str | None:
return request.client.host if request.client else None
def _to_item(u: User) -> UserItem:
return UserItem(
id=u.id, username=u.username, role=u.role, is_active=u.is_active, created_at=u.created_at # type: ignore[arg-type]
)
@router.get("/api/users", response_model=list[UserItem])
def list_users(
db: Annotated[Session, Depends(get_db)],
_: Annotated[User, Depends(require_admin)],
) -> list[UserItem]:
rows = db.execute(select(User).order_by(User.created_at.asc())).scalars().all()
return [_to_item(u) for u in rows]
@router.post("/api/users", response_model=UserItem)
def create_user(
payload: CreateUserRequest,
request: Request,
db: Annotated[Session, Depends(get_db)],
actor: Annotated[User, Depends(require_admin)],
) -> UserItem:
try:
validate_password(payload.password)
except PasswordPolicyError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
if db.execute(select(User).where(User.username == payload.username)).scalar_one_or_none():
raise HTTPException(status_code=409, detail="Username already exists")
u = User(
username=payload.username,
password_hash=hash_password(payload.password),
role=payload.role,
is_active=True,
)
db.add(u); db.flush()
record_event(
db, event="user_create", user_id=actor.id, ip=_ip(request),
detail={"target": u.id, "username": u.username, "role": u.role},
)
db.commit(); db.refresh(u)
return _to_item(u)
@router.patch("/api/users/{user_id}", response_model=UserItem)
def update_user(
user_id: int,
payload: UpdateUserRequest,
request: Request,
db: Annotated[Session, Depends(get_db)],
actor: Annotated[User, Depends(require_admin)],
) -> UserItem:
u = db.get(User, user_id)
if u is None:
raise HTTPException(status_code=404, detail="User not found")
changed: dict = {}
if payload.role is not None and payload.role != u.role:
u.role = payload.role; changed["role"] = payload.role
if payload.is_active is not None and payload.is_active != u.is_active:
u.is_active = payload.is_active; changed["is_active"] = payload.is_active
if not payload.is_active:
S.revoke_all_for_user(db, u.id)
if changed:
record_event(db, event="user_update", user_id=actor.id, ip=_ip(request), detail={"target": u.id, **changed})
db.commit(); db.refresh(u)
return _to_item(u)
@router.delete("/api/users/{user_id}")
def delete_user(
user_id: int,
request: Request,
db: Annotated[Session, Depends(get_db)],
actor: Annotated[User, Depends(require_admin)],
) -> dict[str, bool]:
if user_id == actor.id:
raise HTTPException(status_code=400, detail="Cannot delete your own account")
u = db.get(User, user_id)
if u is None:
raise HTTPException(status_code=404, detail="User not found")
db.delete(u)
record_event(db, event="user_delete", user_id=actor.id, ip=_ip(request), detail={"target": user_id})
db.commit()
return {"ok": True}
@router.post("/api/users/{user_id}/reset-password")
def reset_password(
user_id: int,
payload: ResetPasswordRequest,
request: Request,
db: Annotated[Session, Depends(get_db)],
actor: Annotated[User, Depends(require_admin)],
) -> dict[str, bool]:
u = db.get(User, user_id)
if u is None:
raise HTTPException(status_code=404, detail="User not found")
try:
validate_password(payload.password)
except PasswordPolicyError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
u.password_hash = hash_password(payload.password)
S.revoke_all_for_user(db, u.id)
record_event(db, event="password_reset", user_id=actor.id, ip=_ip(request), detail={"target": u.id})
db.commit()
return {"ok": True}
@router.get("/api/audit", response_model=list[AuditItem])
def list_audit(
db: Annotated[Session, Depends(get_db)],
_: Annotated[User, Depends(require_admin)],
limit: int = 200,
event: str | None = None,
) -> list[AuditItem]:
limit = max(1, min(limit, 1000))
q = select(AuthAudit).order_by(AuthAudit.ts.desc()).limit(limit)
if event:
q = q.where(AuthAudit.event == event)
rows = db.execute(q).scalars().all()
return [
AuditItem(id=r.id, ts=r.ts, user_id=r.user_id, event=r.event, ip=r.ip, detail=r.detail)
for r in rows
]

View File

@ -2,7 +2,7 @@ from __future__ import annotations
import hashlib import hashlib
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta from datetime import datetime, timedelta, timezone
from cryptography import x509 from cryptography import x509
from cryptography.hazmat.primitives import hashes, serialization from cryptography.hazmat.primitives import hashes, serialization
@ -30,7 +30,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
subject = x509.Name([ subject = x509.Name([
x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"), x509.NameAttribute(NameOID.COMMON_NAME, "Clearview Scan App"),
]) ])
expires_at = datetime.utcnow() + timedelta(days=365 * valid_years) expires_at = datetime.now(timezone.utc) + timedelta(days=365 * valid_years)
cert = ( cert = (
x509.CertificateBuilder() x509.CertificateBuilder()
@ -38,7 +38,7 @@ def generate_tenant_certificate(valid_years: int = 2) -> GeneratedCertificate:
.issuer_name(subject) .issuer_name(subject)
.public_key(private_key.public_key()) .public_key(private_key.public_key())
.serial_number(x509.random_serial_number()) .serial_number(x509.random_serial_number())
.not_valid_before(datetime.utcnow()) .not_valid_before(datetime.now(timezone.utc))
.not_valid_after(expires_at) .not_valid_after(expires_at)
.sign(private_key, hashes.SHA256()) .sign(private_key, hashes.SHA256())
) )

View File

@ -36,3 +36,10 @@ SCAN_HTTP_BACKOFF_SEC = _int_env("SCAN_HTTP_BACKOFF_SEC", 2)
SCAN_LIST_PAGE_SIZE = _int_env("SCAN_LIST_PAGE_SIZE", 200) SCAN_LIST_PAGE_SIZE = _int_env("SCAN_LIST_PAGE_SIZE", 200)
SCAN_MAX_ITEMS_PER_LIST = _int_env("SCAN_MAX_ITEMS_PER_LIST", 10000) SCAN_MAX_ITEMS_PER_LIST = _int_env("SCAN_MAX_ITEMS_PER_LIST", 10000)
# Auth cookie settings (override via env)
COOKIE_NAME = "clearview_session"
# Local-only HTTP deployment: default to non-Secure cookies. Set
# COOKIE_SECURE=true if the stack ever sits behind HTTPS.
COOKIE_SECURE = os.environ.get("COOKIE_SECURE", "false").lower() == "true"
COOKIE_SAMESITE = "lax"

View File

@ -2,13 +2,18 @@ from __future__ import annotations
import csv import csv
import io import io
import re
from .default_sites import normalize_site_url from .default_sites import normalize_site_url
_EMAIL_RE = re.compile(r"^[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}$")
class CsvImportResult: class CsvImportResult:
def __init__(self) -> None: def __init__(self) -> None:
self.urls: list[str] = [] self.urls: list[str] = []
self.mailboxes: list[str] = []
self.invalid_rows: list[str] = [] self.invalid_rows: list[str] = []
self.total_rows: int = 0 self.total_rows: int = 0
@ -22,7 +27,7 @@ def parse_sites_csv(content: bytes) -> CsvImportResult:
if not reader.fieldnames: if not reader.fieldnames:
return result return result
url_key = _resolve_url_column(reader.fieldnames) url_key = _resolve_column(reader.fieldnames, ("url", "site url", "siteurl"))
if not url_key: if not url_key:
return result return result
@ -49,9 +54,80 @@ def parse_sites_csv(content: bytes) -> CsvImportResult:
return result return result
def _resolve_url_column(fieldnames: list[str]) -> str | None: def parse_entra_groups_csv(content: bytes) -> CsvImportResult:
result = CsvImportResult()
text = content.decode("utf-8-sig", errors="replace")
reader = csv.DictReader(io.StringIO(text))
if not reader.fieldnames:
return result
id_key = _resolve_column(
reader.fieldnames,
("object id", "objectid", "id", "objectguid", "object_id"),
)
if not id_key:
return result
seen: set[str] = set()
for idx, row in enumerate(reader, start=2):
result.total_rows += 1
raw = (row.get(id_key) or "").strip()
if not raw:
result.invalid_rows.append(f"row {idx}: empty Object ID")
continue
normalized = raw.lower()
if normalized in seen:
continue
seen.add(normalized)
result.urls.append(normalized)
return result
def parse_mailboxes_csv(content: bytes) -> CsvImportResult:
result = CsvImportResult()
text = content.decode("utf-8-sig", errors="replace")
reader = csv.DictReader(io.StringIO(text))
if not reader.fieldnames:
return result
upn_key = _resolve_column(
reader.fieldnames,
("userprincipalname", "upn", "email", "emailaddress", "mail", "mailbox", "primary smtp address"),
)
if not upn_key:
return result
seen: set[str] = set()
for idx, row in enumerate(reader, start=2):
result.total_rows += 1
raw = (row.get(upn_key) or "").strip()
if not raw:
result.invalid_rows.append(f"row {idx}: empty mailbox")
continue
normalized = raw.lower()
if not _EMAIL_RE.match(normalized):
result.invalid_rows.append(f"row {idx}: invalid mailbox '{raw}'")
continue
if normalized in seen:
continue
seen.add(normalized)
result.mailboxes.append(normalized)
return result
def _resolve_column(fieldnames: list[str], candidates: tuple[str, ...]) -> str | None:
mapping = {name.strip().lower(): name for name in fieldnames} mapping = {name.strip().lower(): name for name in fieldnames}
for candidate in ("url", "site url", "siteurl"): for candidate in candidates:
if candidate in mapping: if candidate in mapping:
return mapping[candidate] return mapping[candidate]
return None return None

View File

@ -0,0 +1,53 @@
"""Database migration bootstrap.
Replaces the previous ``Base.metadata.create_all`` + ``_ensure_schema_columns``
startup path with Alembic. The bootstrap is idempotent and handles three cases:
* **Fresh database** (no tables): run ``upgrade head`` to create the schema and
record the Alembic version.
* **Existing pre-Alembic database** (tables present, no ``alembic_version``):
``stamp head`` adopt the baseline without re-creating existing tables.
* **Already under Alembic**: run ``upgrade head`` to apply any new revisions.
"""
from __future__ import annotations
import logging
from pathlib import Path
from alembic import command
from alembic.config import Config
from sqlalchemy import inspect
from .db import engine
log = logging.getLogger(__name__)
_MIGRATIONS_DIR = Path(__file__).resolve().parent / "migrations"
# A table that exists in every pre-Alembic Clearview database; its presence
# (without alembic_version) marks a database that predates Alembic adoption.
_SENTINEL_TABLE = "scan_jobs"
def _alembic_config() -> Config:
cfg = Config()
cfg.set_main_option("script_location", str(_MIGRATIONS_DIR))
return cfg
_BASELINE_REVISION = "0001_baseline"
def run_migrations() -> None:
"""Bring the database schema up to date (see module docstring)."""
cfg = _alembic_config()
tables = set(inspect(engine).get_table_names())
if "alembic_version" not in tables and _SENTINEL_TABLE in tables:
# Pre-Alembic DB: it already matches the baseline, so adopt that
# revision without re-creating tables, then let upgrade apply any
# later migrations (e.g. the timestamptz conversion in 0002).
log.info("Existing pre-Alembic schema detected; stamping baseline %s.", _BASELINE_REVISION)
command.stamp(cfg, _BASELINE_REVISION)
log.info("Applying Alembic migrations (upgrade head).")
command.upgrade(cfg, "head")

View File

@ -1,43 +1,28 @@
"""Clearview API composition root.
Routes live in the ``api_tenants``, ``api_jobs``, and ``api_onboarding`` modules
(shared helpers in ``api_helpers``). This module only wires the FastAPI app,
the scan worker lifecycle, health/version endpoints, and static file serving.
"""
from __future__ import annotations from __future__ import annotations
import uuid
from datetime import datetime
from pathlib import Path from pathlib import Path
import io from fastapi import Depends, FastAPI
from fastapi.responses import FileResponse
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse, RedirectResponse, Response, StreamingResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from sqlalchemy import select, text
from sqlalchemy.orm import joinedload
from .csv_import import parse_sites_csv from .api_jobs import router as jobs_router
from .db import SessionLocal, engine from .api_onboarding import router as onboarding_router
from .default_sites import is_default_site, normalize_site_url from .api_tenants import router as tenants_router
from .models import Base, PermissionDeviation, ScanJob, ScanTarget, TenantProfile from .auth.dependencies import require_user
from .onboarding import OnboardingError, consume_callback_state, create_connect_url, create_scan_app_for_tenant from .auth.router import router as auth_router
from .cert import generate_tenant_certificate from .auth.users_router import router as users_router
from .schemas import ( from .db_migrate import run_migrations
ConnectMicrosoftResponse, from .version import display_version
CreateScanAppRequest,
CreateScanAppResponse,
CreateScanJobRequest,
CreateTenantProfileRequest,
PermissionDeviationItem,
ResolveSharingLinksRequest,
ResolveSharingLinksResponse,
ScanJobCreateResponse,
ScanJobDetail,
ScanJobSummary,
ScanTargetItem,
TenantCertificateResponse,
TenantProfileItem,
)
from .scanner import AuthConfig
from .worker import ScanWorker from .worker import ScanWorker
app = FastAPI(title="Clearview API", version="0.1.0") app = FastAPI(title="Clearview API", version=display_version().lstrip("v"))
worker = ScanWorker() worker = ScanWorker()
SITE_DIR = Path(__file__).resolve().parents[2] / "site" SITE_DIR = Path(__file__).resolve().parents[2] / "site"
@ -45,8 +30,7 @@ SITE_DIR = Path(__file__).resolve().parents[2] / "site"
@app.on_event("startup") @app.on_event("startup")
def on_startup() -> None: def on_startup() -> None:
Base.metadata.create_all(bind=engine) run_migrations()
_ensure_schema_columns()
worker.start() worker.start()
@ -60,481 +44,27 @@ def healthz() -> dict[str, str]:
return {"status": "ok"} return {"status": "ok"}
# --------------------------------------------------------------------------- @app.get("/api/version")
# Tenant profiles def version() -> dict[str, str]:
# --------------------------------------------------------------------------- """Return the running build's user-visible version (e.g. v0.1.0.3)."""
return {"version": display_version()}
@app.get("/api/tenants", response_model=list[TenantProfileItem])
def list_tenants() -> list[TenantProfileItem]:
with SessionLocal() as db:
profiles = list(
db.execute(select(TenantProfile).order_by(TenantProfile.created_at.asc())).scalars()
)
return [_to_tenant_item(p) for p in profiles]
@app.post("/api/tenants", response_model=TenantProfileItem, status_code=201) # Public auth endpoints (login / setup / setup-required) — no dependency.
def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem: app.include_router(auth_router)
with SessionLocal() as db:
now = datetime.utcnow()
profile = TenantProfile(
id=str(uuid.uuid4()),
name=payload.name.strip(),
tenant_id=payload.tenant_id.strip(),
client_id=payload.client_id.strip(),
client_secret=payload.client_secret.strip() if payload.client_secret else None,
created_at=now,
updated_at=now,
)
db.add(profile)
db.commit()
db.refresh(profile)
return _to_tenant_item(profile)
# Admin endpoints — already enforce require_admin internally.
app.include_router(users_router)
@app.post("/api/tenants/{profile_id}/generate-certificate", response_model=TenantCertificateResponse) # Existing routers gated by an authenticated session.
def generate_certificate(profile_id: str) -> TenantCertificateResponse: _protected = [Depends(require_user)]
with SessionLocal() as db: app.include_router(tenants_router, dependencies=_protected)
profile = db.get(TenantProfile, profile_id) app.include_router(jobs_router, dependencies=_protected)
if not profile: app.include_router(onboarding_router, dependencies=_protected)
raise HTTPException(status_code=404, detail="Tenant profile not found")
result = generate_tenant_certificate()
profile.cert_private_key = result.private_key_pem
profile.cert_thumbprint = result.thumbprint
profile.cert_expires_at = result.expires_at
profile.updated_at = datetime.utcnow()
db.commit()
return TenantCertificateResponse(
thumbprint=result.thumbprint,
expires_at=result.expires_at,
public_cert_pem=result.public_cert_pem,
)
@app.delete("/api/tenants/{profile_id}", status_code=204, response_class=Response)
def delete_tenant(profile_id: str) -> Response:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
# Detach jobs from this profile before deleting
db.execute(
text("UPDATE scan_jobs SET tenant_profile_id = NULL WHERE tenant_profile_id = :pid"),
{"pid": profile_id},
)
db.delete(profile)
db.commit()
return Response(status_code=204)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Scan jobs # Static files (mounted last so explicit API routes take precedence)
# ---------------------------------------------------------------------------
@app.post("/api/scan-jobs", response_model=ScanJobCreateResponse)
def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
with SessionLocal() as db:
tenant_id, client_id, client_secret, profile_id = _resolve_credentials(
db=db,
tenant_profile_id=payload.tenant_profile_id,
tenant_id=payload.tenant_id,
client_id=payload.client_id,
client_secret=payload.client_secret,
)
raw_urls = [str(item) for item in payload.site_urls]
return _create_job_from_urls(
raw_urls=raw_urls,
skip_default_sites=payload.skip_default_sites,
source_type="manual",
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
tenant_profile_id=profile_id,
)
@app.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
def create_scan_job_from_csv(
skip_default_sites: bool = True,
tenant_profile_id: str | None = Form(None),
tenant_id: str | None = Form(None),
client_id: str | None = Form(None),
client_secret: str | None = Form(None),
file: UploadFile = File(...),
) -> ScanJobCreateResponse:
with SessionLocal() as db:
resolved_tenant_id, resolved_client_id, resolved_client_secret, profile_id = _resolve_credentials(
db=db,
tenant_profile_id=tenant_profile_id,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
)
content = file.file.read()
parsed = parse_sites_csv(content)
response = _create_job_from_urls(
raw_urls=parsed.urls,
skip_default_sites=skip_default_sites,
source_type="csv",
tenant_id=resolved_tenant_id,
client_id=resolved_client_id,
client_secret=resolved_client_secret,
tenant_profile_id=profile_id,
)
if parsed.invalid_rows:
csv_warning = f"CSV issues: {len(parsed.invalid_rows)}"
with SessionLocal() as db:
job = db.get(ScanJob, response.job.id)
if job:
if job.warning_message:
job.warning_message = f"{job.warning_message} | {csv_warning}"
else:
job.warning_message = csv_warning
job.updated_at = datetime.utcnow()
db.commit()
db.refresh(job)
response.job.warning_message = job.warning_message
return response
@app.post("/api/scan-jobs/{job_id}/cancel", response_model=ScanJobSummary)
def cancel_scan_job(job_id: str) -> ScanJobSummary:
with SessionLocal() as db:
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
job = db.execute(stmt).unique().scalar_one_or_none()
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status not in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is not queued or running")
now = datetime.utcnow()
job.status = "cancelled"
job.updated_at = now
job.finished_at = now
job.scan_activity = None
db.commit()
db.refresh(job)
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job_id)
job = db.execute(stmt).unique().scalar_one()
return _to_job_summary(job)
@app.delete("/api/scan-jobs/{job_id}", status_code=204, response_class=Response)
def delete_scan_job(job_id: str) -> Response:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Cannot delete a job that is queued or running")
db.delete(job)
db.commit()
return Response(status_code=204)
@app.get("/api/scan-jobs", response_model=list[ScanJobSummary])
def list_scan_jobs(limit: int = 20, tenant_profile_id: str | None = None) -> list[ScanJobSummary]:
with SessionLocal() as db:
stmt = (
select(ScanJob)
.options(joinedload(ScanJob.tenant_profile))
.order_by(ScanJob.created_at.desc())
.limit(max(1, min(limit, 100)))
)
if tenant_profile_id:
stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
jobs = list(db.execute(stmt).unique().scalars())
return [_to_job_summary(job) for job in jobs]
@app.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
from .scanner import resolve_sharing_link_members
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
)
all_deviations = list(
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
if not dev.principal.startswith("SharingLinks."):
continue
parts = dev.principal.split(".", 3)
if len(parts) < 3:
continue
link_type = parts[2]
if link_type not in payload.link_types:
continue
key = (dev.site_url, dev.principal)
groups.setdefault(key, []).append(dev.id)
updated_deviations = 0
for (site_url, group_name), dev_ids in groups.items():
members = resolve_sharing_link_members(site_url, group_name, auth)
resolved_members = ", ".join(members) if members else ""
with SessionLocal() as db:
for dev_id in dev_ids:
dev = db.get(PermissionDeviation, dev_id)
if dev:
dev.resolved_members = resolved_members
db.commit()
updated_deviations += len(dev_ids)
return ResolveSharingLinksResponse(
resolved_groups=len(groups),
updated_deviations=updated_deviations,
)
@app.get("/api/scan-jobs/{job_id}/export")
def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
import openpyxl
from openpyxl.styles import Font, PatternFill
with SessionLocal() as db:
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
if not job:
raise HTTPException(status_code=404, detail="Job not found")
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
if site_url:
targets_q = targets_q.where(ScanTarget.site_url == site_url)
targets = list(db.execute(targets_q).scalars())
deviations_q = (
select(PermissionDeviation)
.where(PermissionDeviation.job_id == job.id)
.order_by(PermissionDeviation.id.desc())
)
if site_url:
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
deviations = list(db.execute(deviations_q).scalars())
wb = openpyxl.Workbook()
header_fill = PatternFill(start_color="1E2A3A", end_color="1E2A3A", fill_type="solid")
header_font_white = Font(bold=True, color="FFFFFF")
_risk_styles: dict[str, tuple] = {
"Critical": (
PatternFill(start_color="FDDEDE", end_color="FDDEDE", fill_type="solid"),
Font(bold=True, color="7B0000"),
),
"High": (
PatternFill(start_color="FEE8D3", end_color="FEE8D3", fill_type="solid"),
Font(bold=True, color="7C2D00"),
),
"Low": (
PatternFill(start_color="D6EEF8", end_color="D6EEF8", fill_type="solid"),
Font(bold=True, color="0C4A6E"),
),
"Unknown": (
PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid"),
Font(bold=True, color="555555"),
),
}
def _style_header(ws, headers):
ws.append(headers)
for cell in ws[1]:
cell.font = header_font_white
cell.fill = header_fill
# Targets sheet
ws_targets = wb.active
ws_targets.title = "Targets"
_style_header(ws_targets, ["Site URL", "Status", "Attempts", "Error", "Started", "Finished"])
for t in targets:
ws_targets.append([
t.site_url,
t.status,
t.attempts,
t.error_message or "",
t.started_at.isoformat() if t.started_at else "",
t.finished_at.isoformat() if t.finished_at else "",
])
for col in ws_targets.columns:
ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
# Deviations sheet
ws_dev = wb.create_sheet("Deviations")
_style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
for d in deviations:
base = (d.site_url or "").rstrip("/")
obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
link_risk = _sharing_link_risk_label(d.principal)
ws_dev.append([
d.site_url,
obj_rel,
d.object_type,
d.principal,
link_risk,
d.resolved_members or "",
d.role_name,
d.delta_type,
])
if link_risk in _risk_styles:
risk_fill, risk_font = _risk_styles[link_risk]
risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
risk_cell.fill = risk_fill
risk_cell.font = risk_font
for col in ws_dev.columns:
ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
buf = io.BytesIO()
wb.save(buf)
buf.seek(0)
filename = f"clearview_job_{job_id}.xlsx"
return StreamingResponse(
buf,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@app.get("/api/scan-jobs/{job_id}", response_model=ScanJobDetail)
def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
with SessionLocal() as db:
job = db.get(ScanJob, job_id, options=[joinedload(ScanJob.tenant_profile)])
if not job:
raise HTTPException(status_code=404, detail="Job not found")
targets_q = select(ScanTarget).where(ScanTarget.job_id == job.id).order_by(ScanTarget.id.asc())
if site_url:
targets_q = targets_q.where(ScanTarget.site_url == site_url)
targets = list(db.execute(targets_q).scalars())
deviations_q = (
select(PermissionDeviation)
.where(PermissionDeviation.job_id == job.id)
.order_by(PermissionDeviation.site_url.asc(), PermissionDeviation.object_url.asc(), PermissionDeviation.id.asc())
)
if site_url:
deviations_q = deviations_q.where(PermissionDeviation.site_url == site_url)
else:
deviations_q = deviations_q.limit(1000)
deviations = list(db.execute(deviations_q).scalars())
return ScanJobDetail(
**_to_job_summary(job).model_dump(),
targets=[
ScanTargetItem(
id=t.id,
site_url=t.site_url,
status=t.status,
attempts=t.attempts,
error_message=t.error_message,
started_at=t.started_at,
finished_at=t.finished_at,
)
for t in targets
],
deviations=[
PermissionDeviationItem(
id=d.id,
site_url=d.site_url,
object_url=d.object_url,
object_type=d.object_type,
principal=d.principal,
role_name=d.role_name,
delta_type=d.delta_type,
resolved_members=d.resolved_members,
created_at=d.created_at,
)
for d in deviations
],
)
# ---------------------------------------------------------------------------
# Onboarding
# ---------------------------------------------------------------------------
@app.post("/api/onboarding/create-scan-app", response_model=CreateScanAppResponse)
def onboarding_create_scan_app(payload: CreateScanAppRequest) -> CreateScanAppResponse:
try:
result = create_scan_app_for_tenant(
tenant_id=payload.tenant_id,
display_name=payload.display_name,
)
except OnboardingError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=500, detail=f"Unexpected onboarding error: {exc}") from exc
return CreateScanAppResponse(
tenant_id=result.tenant_id,
client_id=result.client_id,
client_secret=result.client_secret,
app_object_id=result.app_object_id,
service_principal_id=result.service_principal_id,
display_name=result.display_name,
)
@app.get("/api/onboarding/microsoft/connect-url", response_model=ConnectMicrosoftResponse)
def onboarding_microsoft_connect_url() -> ConnectMicrosoftResponse:
try:
return ConnectMicrosoftResponse(connect_url=create_connect_url())
except OnboardingError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.get("/api/onboarding/microsoft/callback")
def onboarding_microsoft_callback(
tenant: str | None = None,
state: str | None = None,
error: str | None = None,
error_description: str | None = None,
) -> RedirectResponse:
if error:
message = (error_description or error).replace(" ", "+")
return RedirectResponse(url=f"/?onboarding_status=error&onboarding_message={message}")
if not state or not consume_callback_state(state):
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=invalid_or_expired_state")
if not tenant:
return RedirectResponse(url="/?onboarding_status=error&onboarding_message=missing_tenant")
return RedirectResponse(url=f"/?onboarding_status=connected&tenant_id={tenant}")
@app.get("/api/onboarding/status")
def onboarding_status() -> dict[str, bool]:
from . import config
automated = bool(config.ONBOARDING_CLIENT_ID and config.ONBOARDING_CLIENT_SECRET and config.ONBOARDING_REDIRECT_URI)
return {"automated_available": automated}
# ---------------------------------------------------------------------------
# Static files
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@app.get("/") @app.get("/")
@ -543,190 +73,3 @@ def index() -> FileResponse:
app.mount("/", StaticFiles(directory=SITE_DIR, html=True), name="site") app.mount("/", StaticFiles(directory=SITE_DIR, html=True), name="site")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _resolve_credentials(
db,
tenant_profile_id: str | None,
tenant_id: str | None,
client_id: str | None,
client_secret: str | None,
) -> tuple[str, str, str | None, str | None]:
if tenant_profile_id:
profile = db.get(TenantProfile, tenant_profile_id)
if not profile:
raise HTTPException(status_code=404, detail="Tenant profile not found")
if not profile.client_secret and not profile.cert_thumbprint:
raise HTTPException(
status_code=400,
detail="Tenant profile has no client secret and no certificate. Generate a certificate first.",
)
return profile.tenant_id, profile.client_id, profile.client_secret, tenant_profile_id
if tenant_id and client_id and client_secret:
return tenant_id.strip(), client_id.strip(), client_secret.strip(), None
raise HTTPException(
status_code=400,
detail="Provide either tenant_profile_id or all of tenant_id, client_id, and client_secret.",
)
def _create_job_from_urls(
raw_urls: list[str],
skip_default_sites: bool,
source_type: str,
tenant_id: str,
client_id: str,
client_secret: str,
tenant_profile_id: str | None = None,
) -> ScanJobCreateResponse:
accepted_urls: list[str] = []
skipped_default_urls: list[str] = []
invalid_urls: list[str] = []
seen: set[str] = set()
for raw in raw_urls:
normalized = normalize_site_url(raw)
if not normalized:
invalid_urls.append(raw)
continue
if normalized in seen:
continue
seen.add(normalized)
if skip_default_sites and is_default_site(normalized):
skipped_default_urls.append(normalized)
continue
accepted_urls.append(normalized)
with SessionLocal() as db:
now = datetime.utcnow()
job = ScanJob(
id=str(uuid.uuid4()),
source_type=source_type,
status="queued" if accepted_urls else "completed",
skip_default_sites=skip_default_sites,
tenant_profile_id=tenant_profile_id,
auth_tenant_id=tenant_id,
auth_client_id=client_id,
auth_client_secret=client_secret,
total_targets=len(accepted_urls),
skipped_targets=len(skipped_default_urls),
warning_message=None,
error_message=None,
created_at=now,
updated_at=now,
finished_at=now if not accepted_urls else None,
)
if not accepted_urls:
job.warning_message = "No scannable sites after validation and default-site filtering"
db.add(job)
db.flush()
for index, site_url in enumerate(accepted_urls, start=1):
db.add(
ScanTarget(
job_id=job.id,
site_url=site_url,
source_row=index,
status="queued",
attempts=0,
created_at=now,
updated_at=now,
)
)
db.commit()
# Reload with profile for summary
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
job = db.execute(stmt).unique().scalar_one()
return ScanJobCreateResponse(
job=_to_job_summary(job),
accepted_urls=accepted_urls,
skipped_default_urls=skipped_default_urls,
invalid_urls=invalid_urls,
)
def _to_job_summary(job: ScanJob) -> ScanJobSummary:
return ScanJobSummary(
id=job.id,
status=job.status,
source_type=job.source_type,
skip_default_sites=job.skip_default_sites,
tenant_profile_id=job.tenant_profile_id,
tenant_name=job.tenant_profile.name if job.tenant_profile else None,
total_targets=job.total_targets,
processed_targets=job.processed_targets,
successful_targets=job.successful_targets,
failed_targets=job.failed_targets,
skipped_targets=job.skipped_targets,
items_scanned=job.items_scanned,
scan_activity=job.scan_activity if job.status == "running" else None,
warning_message=job.warning_message,
error_message=job.error_message,
created_at=job.created_at,
updated_at=job.updated_at,
started_at=job.started_at,
finished_at=job.finished_at,
)
def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
return TenantProfileItem(
id=profile.id,
name=profile.name,
tenant_id=profile.tenant_id,
client_id=profile.client_id,
has_certificate=bool(profile.cert_thumbprint),
cert_thumbprint=profile.cert_thumbprint,
cert_expires_at=profile.cert_expires_at,
created_at=profile.created_at,
updated_at=profile.updated_at,
)
def _sharing_link_risk_label(principal: str) -> str:
if not principal.startswith("SharingLinks."):
return ""
parts = principal.split(".", 3)
link_type = parts[2] if len(parts) >= 3 else ""
if link_type.startswith("Anonymous"):
return "Critical"
if link_type == "Flexible":
return "High"
if link_type.startswith("Organization"):
return "Low"
if link_type.startswith("Direct"):
return "Low"
return "Unknown"
def _ensure_schema_columns() -> None:
stmts = [
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_tenant_id VARCHAR(128)",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_client_id VARCHAR(128)",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS auth_client_secret TEXT",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS tenant_profile_id VARCHAR(36)",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS items_scanned INTEGER NOT NULL DEFAULT 0",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_activity TEXT",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS client_secret TEXT",
"ALTER TABLE tenant_profiles ALTER COLUMN client_secret DROP NOT NULL",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_private_key TEXT",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_thumbprint VARCHAR(64)",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_expires_at TIMESTAMP",
"ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS resolved_members TEXT",
]
with engine.begin() as conn:
for stmt in stmts:
conn.execute(text(stmt))

View File

@ -0,0 +1,58 @@
"""Alembic environment for Clearview.
Reuses the application's SQLAlchemy engine (already configured with the
normalized DATABASE_URL and pool_pre_ping) so migrations run against exactly
the same database the app uses. Logging config from alembic.ini is applied
only when Alembic is invoked through the CLI; programmatic invocation from
``clearview_app.db_migrate`` passes a Config without a file.
"""
from __future__ import annotations
from logging.config import fileConfig
from alembic import context
from clearview_app.config import DATABASE_URL
from clearview_app.db import _normalize_database_url, engine as app_engine
from clearview_app.models import Base
config = context.config
if config.config_file_name is not None:
try:
fileConfig(config.config_file_name)
except Exception: # noqa: BLE001 - logging config is best-effort
pass
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Emit SQL to stdout without a live DB connection."""
context.configure(
url=_normalize_database_url(DATABASE_URL),
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations against the live database via the app engine."""
with app_engine.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from __future__ import annotations
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@ -0,0 +1,31 @@
"""baseline schema
Captures the full Clearview schema as defined by the SQLAlchemy models at the
time Alembic was adopted. Creating it via ``Base.metadata.create_all`` keeps the
baseline guaranteed-identical to the models (the same DDL the app emitted before
Alembic). Existing databases are ``stamp``-ed to this revision rather than
re-running ``upgrade`` (see ``clearview_app.db_migrate``).
Revision ID: 0001_baseline
Revises:
Create Date: 2026-05-26
"""
from __future__ import annotations
from alembic import op
from clearview_app.models import Base
# revision identifiers, used by Alembic.
revision = "0001_baseline"
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
Base.metadata.create_all(bind=op.get_bind())
def downgrade() -> None:
Base.metadata.drop_all(bind=op.get_bind())

View File

@ -0,0 +1,63 @@
"""convert timestamp columns to timestamptz
The app now uses timezone-aware UTC datetimes (DateTime(timezone=True)).
Existing databases store naive ``timestamp without time zone`` values that were
written as UTC, so we reinterpret them as UTC while converting. The conversion
is guarded per column on the current type, so it is a no-op on databases whose
columns are already ``timestamptz`` (e.g. a fresh DB created from the updated
baseline models).
Revision ID: 0002_timestamptz
Revises: 0001_baseline
Create Date: 2026-05-26
"""
from __future__ import annotations
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "0002_timestamptz"
down_revision = "0001_baseline"
branch_labels = None
depends_on = None
# Table -> datetime columns (names come from our own models, never user input).
_COLUMNS: dict[str, tuple[str, ...]] = {
"tenant_profiles": ("cert_expires_at", "created_at", "updated_at"),
"scan_jobs": ("created_at", "updated_at", "started_at", "finished_at", "heartbeat_at"),
"scan_targets": ("last_probe_at", "created_at", "updated_at", "started_at", "finished_at"),
"permission_deviations": ("created_at",),
}
def _column_type(bind, table: str, column: str) -> str | None:
return bind.execute(
sa.text(
"SELECT data_type FROM information_schema.columns "
"WHERE table_name = :t AND column_name = :c"
),
{"t": table, "c": column},
).scalar()
def upgrade() -> None:
bind = op.get_bind()
for table, columns in _COLUMNS.items():
for column in columns:
if _column_type(bind, table, column) == "timestamp without time zone":
op.execute(
f'ALTER TABLE {table} ALTER COLUMN {column} '
f"TYPE timestamptz USING {column} AT TIME ZONE 'UTC'"
)
def downgrade() -> None:
bind = op.get_bind()
for table, columns in _COLUMNS.items():
for column in columns:
if _column_type(bind, table, column) == "timestamp with time zone":
op.execute(
f'ALTER TABLE {table} ALTER COLUMN {column} '
f"TYPE timestamp USING {column} AT TIME ZONE 'UTC'"
)

View File

@ -0,0 +1,67 @@
"""Create users, user_sessions, auth_audit tables.
Revision ID: 0003_auth_tables
Revises: 0002_timestamptz
Create Date: 2026-05-28
"""
from __future__ import annotations
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision = "0003_auth_tables"
down_revision = "0002_timestamptz"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"users",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("username", sa.String(length=128), nullable=False, unique=True),
sa.Column("password_hash", sa.Text(), nullable=False),
sa.Column("role", sa.String(length=16), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.true()),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
)
op.create_index("ix_users_username", "users", ["username"], unique=True)
op.create_table(
"user_sessions",
sa.Column("id", sa.String(length=64), primary_key=True),
sa.Column("user_id", sa.Integer(), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("last_seen_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("ip", sa.String(length=64), nullable=True),
sa.Column("user_agent", sa.Text(), nullable=True),
sa.Column("remember", sa.Boolean(), nullable=False, server_default=sa.false()),
)
op.create_index("ix_user_sessions_user_id", "user_sessions", ["user_id"])
op.create_index("ix_user_sessions_expires_at", "user_sessions", ["expires_at"])
op.create_table(
"auth_audit",
sa.Column("id", sa.BigInteger(), primary_key=True, autoincrement=True),
sa.Column("ts", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("user_id", sa.Integer(), sa.ForeignKey("users.id", ondelete="SET NULL"), nullable=True),
sa.Column("event", sa.String(length=32), nullable=False),
sa.Column("ip", sa.String(length=64), nullable=True),
sa.Column("detail", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
)
op.create_index("ix_auth_audit_ts", "auth_audit", ["ts"])
op.create_index("ix_auth_audit_event", "auth_audit", ["event"])
def downgrade() -> None:
op.drop_index("ix_auth_audit_event", table_name="auth_audit")
op.drop_index("ix_auth_audit_ts", table_name="auth_audit")
op.drop_table("auth_audit")
op.drop_index("ix_user_sessions_expires_at", table_name="user_sessions")
op.drop_index("ix_user_sessions_user_id", table_name="user_sessions")
op.drop_table("user_sessions")
op.drop_index("ix_users_username", table_name="users")
op.drop_table("users")

View File

@ -1,11 +1,16 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime from datetime import datetime, timezone
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
def _utcnow() -> datetime:
"""Timezone-aware UTC now, used as the default for timestamp columns."""
return datetime.now(timezone.utc)
class Base(DeclarativeBase): class Base(DeclarativeBase):
pass pass
@ -16,13 +21,15 @@ class TenantProfile(Base):
id: Mapped[str] = mapped_column(String(36), primary_key=True) id: Mapped[str] = mapped_column(String(36), primary_key=True)
name: Mapped[str] = mapped_column(String(256)) name: Mapped[str] = mapped_column(String(256))
tenant_id: Mapped[str] = mapped_column(String(128)) tenant_id: Mapped[str] = mapped_column(String(128))
primary_domain: Mapped[str | None] = mapped_column(String(256), nullable=True)
client_id: Mapped[str] = mapped_column(String(128)) client_id: Mapped[str] = mapped_column(String(128))
client_secret: Mapped[str | None] = mapped_column(Text, nullable=True) client_secret: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True) cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True) cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile") jobs: Mapped[list["ScanJob"]] = relationship(back_populates="tenant_profile")
@ -34,6 +41,7 @@ class ScanJob(Base):
status: Mapped[str] = mapped_column(String(32), default="queued", index=True) status: Mapped[str] = mapped_column(String(32), default="queued", index=True)
source_type: Mapped[str] = mapped_column(String(16), default="manual") source_type: Mapped[str] = mapped_column(String(16), default="manual")
skip_default_sites: Mapped[bool] = mapped_column(Boolean, default=True) skip_default_sites: Mapped[bool] = mapped_column(Boolean, default=True)
scan_type: Mapped[str] = mapped_column(String(32), default="sharepoint", index=True)
tenant_profile_id: Mapped[str | None] = mapped_column( tenant_profile_id: Mapped[str | None] = mapped_column(
String(36), ForeignKey("tenant_profiles.id", ondelete="SET NULL"), nullable=True, index=True String(36), ForeignKey("tenant_profiles.id", ondelete="SET NULL"), nullable=True, index=True
) )
@ -53,11 +61,11 @@ class ScanJob(Base):
warning_message: Mapped[str | None] = mapped_column(Text, nullable=True) warning_message: Mapped[str | None] = mapped_column(Text, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True) error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) heartbeat_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs") tenant_profile: Mapped["TenantProfile | None"] = relationship(back_populates="jobs")
targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan") targets: Mapped[list["ScanTarget"]] = relationship(back_populates="job", cascade="all,delete-orphan")
@ -76,10 +84,14 @@ class ScanTarget(Base):
attempts: Mapped[int] = mapped_column(Integer, default=0) attempts: Mapped[int] = mapped_column(Integer, default=0)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True) error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) last_probe_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
job: Mapped[ScanJob] = relationship(back_populates="targets") job: Mapped[ScanJob] = relationship(back_populates="targets")
deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan") deviations: Mapped[list["PermissionDeviation"]] = relationship(back_populates="target", cascade="all,delete-orphan")
@ -98,9 +110,10 @@ class PermissionDeviation(Base):
principal: Mapped[str] = mapped_column(Text) principal: Mapped[str] = mapped_column(Text)
role_name: Mapped[str] = mapped_column(Text) role_name: Mapped[str] = mapped_column(Text)
delta_type: Mapped[str] = mapped_column(String(32)) delta_type: Mapped[str] = mapped_column(String(32))
permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True) resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
job: Mapped[ScanJob] = relationship(back_populates="deviations") job: Mapped[ScanJob] = relationship(back_populates="deviations")
target: Mapped[ScanTarget] = relationship(back_populates="deviations") target: Mapped[ScanTarget] = relationship(back_populates="deviations")

View File

@ -1,467 +1,27 @@
from __future__ import annotations """
Backwards-compatibility shim. New code should import from clearview_app.scanners.
"""
import time from .scanners.common import (
from collections.abc import Callable AuthConfig,
from dataclasses import dataclass, field DeviationRecord,
from urllib.parse import urlparse ProbeResult,
ProgressCallback,
import msal ScanResult,
import requests )
from .scanners.sharepoint import (
from .config import ( probe_site,
SCAN_HTTP_BACKOFF_SEC, resolve_sharing_link_members,
SCAN_HTTP_MAX_RETRIES, scan_site_for_deviations,
SCAN_HTTP_TIMEOUT_SEC,
SCAN_LIST_PAGE_SIZE,
SCAN_MAX_ITEMS_PER_LIST,
SHAREPOINT_SCAN_MODE,
) )
__all__ = [
@dataclass "AuthConfig",
class DeviationRecord: "DeviationRecord",
object_url: str "ProbeResult",
object_type: str "ProgressCallback",
principal: str "ScanResult",
role_name: str "probe_site",
delta_type: str "resolve_sharing_link_members",
"scan_site_for_deviations",
]
@dataclass
class ScanResult:
deviations: list[DeviationRecord]
warning: str | None = None
@dataclass(frozen=True)
class PermissionEntry:
principal: str
role_name: str
@dataclass(frozen=True)
class AuthConfig:
tenant_id: str
client_id: str
client_secret: str = ""
cert_private_key: str | None = None
cert_thumbprint: str | None = None
_TOKEN_CACHE: dict[str, str] = {}
ProgressCallback = Callable[[str, int], None]
def scan_site_for_deviations(
site_url: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""
Scan SharePoint permission deviations versus site-root role assignments.
Only SharePoint role assignments are used (site/list/folder/file scope).
No filesystem/NTFS permission model is used.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ScanResult(
deviations=[],
warning=(
"SharePoint scan mode is 'placeholder'. "
"Set SHAREPOINT_SCAN_MODE=sharepoint_app_only and configure Azure app credentials."
),
)
if SHAREPOINT_SCAN_MODE != "sharepoint_app_only":
raise RuntimeError(f"Unsupported SHAREPOINT_SCAN_MODE='{SHAREPOINT_SCAN_MODE}'")
_validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
parsed = urlparse(site_url)
host = parsed.netloc
_report(f"Connecting to {host}")
token = _get_token_for_host(host, auth)
base_headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
_report(f"Loading site permissions: {site_url}")
root_assignments = _get_role_assignments(
f"{site_url}/_api/web/roleassignments?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
root_set = set(root_assignments)
deviations: list[DeviationRecord] = []
warnings: list[str] = []
lists_url = (
f"{site_url}/_api/web/lists"
"?$select=Id,Title,BaseTemplate,Hidden,ItemCount,RootFolder/ServerRelativeUrl,HasUniqueRoleAssignments"
"&$expand=RootFolder"
)
for lst in _iter_paged(lists_url, base_headers):
if _to_bool(lst.get("Hidden")):
continue
if _to_int(lst.get("BaseTemplate")) != 101:
continue
list_id = str(lst.get("Id", "")).strip()
if not list_id:
continue
list_title = str(lst.get("Title") or "Document Library")
list_url = _absolute_url(host, str((lst.get("RootFolder") or {}).get("ServerRelativeUrl") or ""))
_report(f"Library: {list_title}")
if _to_bool(lst.get("HasUniqueRoleAssignments")):
list_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=list_url,
object_type="DocumentLibrary",
root_set=root_set,
current_set=set(list_assignments),
)
)
items_processed = 0
items_total = 0
items_url = (
f"{site_url}/_api/web/lists(guid'{list_id}')/items"
f"?$select=Id,FileRef,FileSystemObjectType,HasUniqueRoleAssignments&$top={SCAN_LIST_PAGE_SIZE}"
)
for item in _iter_paged(items_url, base_headers):
items_total += 1
if items_total % 50 == 0:
_report(f"Library: {list_title} ({items_total} items scanned)", 50)
if not _to_bool(item.get("HasUniqueRoleAssignments")):
continue
if items_processed >= SCAN_MAX_ITEMS_PER_LIST:
warnings.append(
f"List '{list_title}' hit SCAN_MAX_ITEMS_PER_LIST={SCAN_MAX_ITEMS_PER_LIST}; remaining unique-permission items skipped"
)
break
item_id = _to_int(item.get("Id"))
if item_id <= 0:
continue
file_ref = str(item.get("FileRef") or "")
if not file_ref:
continue
item_type = "File" if _to_int(item.get("FileSystemObjectType")) == 0 else "Folder"
item_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/items({item_id})/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=_absolute_url(host, file_ref),
object_type=item_type,
root_set=root_set,
current_set=set(item_assignments),
)
)
items_processed += 1
_report("Scan complete", 0)
warning = " | ".join(warnings) if warnings else None
return ScanResult(deviations=_deduplicate_hierarchical(deviations), warning=warning)
def resolve_sharing_link_members(
site_url: str,
group_name: str,
auth: AuthConfig,
) -> list[str]:
"""
Return the members of a SharePoint SharingLinks group.
Returns an empty list for anonymous links (no resolvable members).
"""
_validate_auth_config(auth)
parsed = urlparse(site_url)
host = parsed.netloc
token = _get_token_for_host(host, auth)
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
encoded = group_name.replace("'", "''")
url = (
f"{site_url}/_api/web/sitegroups/getbyname('{encoded}')/users"
"?$select=LoginName,Email,Title"
)
try:
data = _request_json(url, headers)
except Exception: # noqa: BLE001
return []
members: list[str] = []
for user in _extract_values(data):
email = str(user.get("Email") or "").strip()
login = str(user.get("LoginName") or "").strip()
title = str(user.get("Title") or "").strip()
# Skip built-in SharePoint system accounts
if login.upper().startswith("SHAREPOINT\\") or login.startswith("c:0(.s|true"):
continue
if email:
members.append(email)
elif title:
members.append(title)
elif login:
members.append(login)
return members
def _validate_auth_config(auth: AuthConfig) -> None:
missing = []
if not auth.tenant_id:
missing.append("tenant_id")
if not auth.client_id:
missing.append("client_id")
if not auth.client_secret and not (auth.cert_thumbprint and auth.cert_private_key):
missing.append("client_secret or certificate")
if missing:
raise RuntimeError("Missing required Azure auth settings: " + ", ".join(missing))
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
cached = _TOKEN_CACHE.get(cache_key)
if cached:
return cached
scope = f"https://{host}/.default"
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
if auth_method == "cert":
client_credential = {
"thumbprint": auth.cert_thumbprint,
"private_key": auth.cert_private_key,
}
else:
client_credential = auth.client_secret
app = msal.ConfidentialClientApplication(
client_id=auth.client_id,
authority=authority,
client_credential=client_credential,
)
result = app.acquire_token_for_client(scopes=[scope])
if "access_token" not in result:
error = result.get("error", "unknown")
description = result.get("error_description", "")
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
token = str(result["access_token"])
_TOKEN_CACHE[cache_key] = token
return token
def _iter_paged(url: str, headers: dict[str, str]):
next_url = url
while next_url:
data = _request_json(next_url, headers)
for item in _extract_values(data):
yield item
next_url = _extract_next_link(data)
def _request_json(url: str, headers: dict[str, str]) -> dict:
last_error: str | None = None
for attempt in range(1, SCAN_HTTP_MAX_RETRIES + 1):
try:
response = requests.get(url, headers=headers, timeout=SCAN_HTTP_TIMEOUT_SEC)
if response.status_code in (429, 503):
retry_after = _to_int(response.headers.get("Retry-After"))
delay = retry_after if retry_after > 0 else SCAN_HTTP_BACKOFF_SEC * attempt
time.sleep(delay)
continue
if response.status_code >= 400:
raise RuntimeError(f"HTTP {response.status_code}: {response.text[:300]}")
return response.json()
except Exception as exc: # noqa: BLE001
last_error = str(exc)
if attempt < SCAN_HTTP_MAX_RETRIES:
time.sleep(SCAN_HTTP_BACKOFF_SEC * attempt)
continue
raise RuntimeError(f"Request failed for {url}: {last_error}") from exc
raise RuntimeError(f"Request failed for {url}: {last_error}")
def _extract_values(data: dict) -> list[dict]:
if "value" in data and isinstance(data["value"], list):
return data["value"]
d = data.get("d")
if isinstance(d, dict):
results = d.get("results")
if isinstance(results, list):
return results
return []
def _extract_next_link(data: dict) -> str | None:
for key in ("@odata.nextLink", "odata.nextLink", "__next"):
value = data.get(key)
if isinstance(value, str) and value:
return value
d = data.get("d")
if isinstance(d, dict):
value = d.get("__next")
if isinstance(value, str) and value:
return value
return None
def _get_role_assignments(url: str, headers: dict[str, str]) -> list[PermissionEntry]:
data = _request_json(url, headers)
assignments: list[PermissionEntry] = []
for item in _extract_values(data):
member = item.get("Member") or {}
principal = str(member.get("LoginName") or member.get("Title") or "").strip()
if not principal:
continue
role_bindings = item.get("RoleDefinitionBindings")
roles = _extract_role_names(role_bindings)
for role_name in roles:
if role_name.lower() == "limited access":
continue
assignments.append(PermissionEntry(principal=principal, role_name=role_name))
return assignments
_ROLE_NAME_NL_TO_EN: dict[str, str] = {
"volledig beheer": "Full Control",
"ontwerpen": "Design",
"bewerken": "Edit",
"bijdragen": "Contribute",
"lezen": "Read",
"beperkte toegang": "Limited Access",
"goedkeuren": "Approve",
"hiërarchieën beheren": "Manage Hierarchy",
"weergeven alleen": "View Only",
"beperkt lezen": "Restricted Read",
}
def _normalize_role_name(name: str) -> str:
return _ROLE_NAME_NL_TO_EN.get(name.lower(), name)
def _extract_role_names(bindings) -> list[str]:
if isinstance(bindings, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in bindings if isinstance(x, dict) and x.get("Name")]
if isinstance(bindings, dict):
results = bindings.get("results")
if isinstance(results, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in results if isinstance(x, dict) and x.get("Name")]
return []
def _deduplicate_hierarchical(deviations: list[DeviationRecord]) -> list[DeviationRecord]:
"""
Remove child-level deviations that are already covered by a parent in the URL hierarchy.
A deviation for (principal, role) at /sites/X/Lib/FolderA is redundant when the same
(principal, role) was already reported at /sites/X/Lib or /sites/X/Lib/FolderA's parent.
Sorting by URL length ascending guarantees parents are evaluated before their children.
"""
sorted_devs = sorted(deviations, key=lambda d: len(d.object_url))
# Maps (principal, role_name) → list of ancestor URLs already reported
covered: dict[tuple[str, str], list[str]] = {}
result: list[DeviationRecord] = []
for dev in sorted_devs:
key = (dev.principal, dev.role_name)
ancestor_urls = covered.get(key)
if ancestor_urls:
parent = dev.object_url.rstrip("/")
already_covered = any(
parent == anc.rstrip("/") or parent.startswith(anc.rstrip("/") + "/")
for anc in ancestor_urls
)
if already_covered:
continue
else:
covered[key] = []
result.append(dev)
covered[key].append(dev.object_url)
return result
def _deviation_records_only_added(
object_url: str,
object_type: str,
root_set: set[PermissionEntry],
current_set: set[PermissionEntry],
) -> list[DeviationRecord]:
records: list[DeviationRecord] = []
for entry in sorted(current_set - root_set, key=lambda x: (x.principal.lower(), x.role_name.lower())):
records.append(
DeviationRecord(
object_url=object_url,
object_type=object_type,
principal=entry.principal,
role_name=entry.role_name,
delta_type="added",
)
)
return records
def _absolute_url(host: str, server_relative_url: str) -> str:
if not server_relative_url:
return f"https://{host}"
if server_relative_url.startswith("http://") or server_relative_url.startswith("https://"):
return server_relative_url
if not server_relative_url.startswith("/"):
server_relative_url = "/" + server_relative_url
return f"https://{host}{server_relative_url}"
def _to_int(value) -> int:
try:
if value is None:
return 0
return int(value)
except (TypeError, ValueError):
return 0
def _to_bool(value) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in ("1", "true", "yes")
return bool(value)

View File

@ -0,0 +1,61 @@
"""
Scanner package dispatches scan requests by scan_type.
Public API:
- AuthConfig, DeviationRecord, ScanResult, ProbeResult, ProgressCallback (common)
- scan(scan_type, target, auth, progress) dispatcher
- probe(scan_type, target, auth) dispatcher
- resolve_sharing_link_members SharePoint-specific, re-exported
"""
from __future__ import annotations
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
)
from . import entra, mailbox, sharepoint
from .sharepoint import resolve_sharing_link_members
__all__ = [
"AuthConfig",
"DeviationRecord",
"ProbeResult",
"ProgressCallback",
"ScanResult",
"scan",
"probe",
"resolve_sharing_link_members",
]
def scan(
scan_type: str,
target: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""Dispatch a scan to the right scanner module."""
if scan_type == "sharepoint":
return sharepoint.scan_site_for_deviations(target, auth, progress)
if scan_type == "sharepoint_root":
return sharepoint.scan_site_root_permissions(target, auth, progress)
if scan_type == "mailbox":
return mailbox.scan_mailbox_for_deviations(target, auth, progress)
if scan_type == "entra_groups":
return entra.scan_entra_group(target, auth, progress)
raise RuntimeError(f"Unknown scan_type '{scan_type}'")
def probe(scan_type: str, target: str, auth: AuthConfig) -> ProbeResult:
"""Dispatch a preflight probe to the right scanner module."""
if scan_type in ("sharepoint", "sharepoint_root"):
return sharepoint.probe_site(target, auth)
if scan_type == "mailbox":
return mailbox.probe_mailbox(target, auth)
if scan_type == "entra_groups":
return entra.probe_entra(target, auth)
raise RuntimeError(f"Unknown scan_type '{scan_type}'")

View File

@ -0,0 +1,52 @@
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
@dataclass(frozen=True)
class AuthConfig:
tenant_id: str
client_id: str
client_secret: str = ""
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
@dataclass
class DeviationRecord:
object_url: str
object_type: str
principal: str
role_name: str
delta_type: str
permission_type: str | None = None
@dataclass
class ScanResult:
deviations: list[DeviationRecord]
warning: str | None = None
@dataclass
class ProbeResult:
ok: bool
message: str
ProgressCallback = Callable[[str, int], None]
def validate_auth_config(auth: AuthConfig) -> None:
missing = []
if not auth.tenant_id:
missing.append("tenant_id")
if not auth.client_id:
missing.append("client_id")
if not auth.client_secret and not (auth.cert_thumbprint and auth.cert_private_key):
missing.append("client_secret or certificate")
if missing:
raise RuntimeError("Missing required Azure auth settings: " + ", ".join(missing))

View File

@ -0,0 +1,293 @@
"""
Entra (Azure AD) groups scanner.
For each target (group object-id or email/UPN-style mail) Clearview retrieves:
- The group's display name and type (Microsoft 365 / Security / Distribution / Mail-enabled security)
- Every Member (recursive across nested groups)
- Every Owner (recursive across nested groups)
Each resulting user is stored as one deviation with:
- object_url = group display label
- object_type = 'EntraGroup'
- principal = userPrincipalName / mail / displayName
- role_name = 'Member' or 'Owner' (with " (via X > Y)" chain when nested)
- delta_type = 'present'
- permission_type = group type ("Microsoft 365" / "Security" / )
Authentication uses a Graph token obtained from MSAL via the existing tenant
certificate. Required Application permission: Group.Read.All on Microsoft Graph.
"""
from __future__ import annotations
from dataclasses import dataclass
from urllib.parse import quote
import requests
from ..config import SCAN_HTTP_BACKOFF_SEC, SCAN_HTTP_MAX_RETRIES, SCAN_HTTP_TIMEOUT_SEC
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
validate_auth_config,
)
from .sharepoint import _get_token_for_host, _request_json
@dataclass
class _ResolvedUser:
upn: str
via: list[str]
def scan_entra_group(
target: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
headers = _graph_headers(auth)
_report(f"Resolving group: {target}")
group = _resolve_group(target, headers)
if not group:
return ScanResult(deviations=[], warning=f"Group not found: {target}")
group_id = str(group.get("id") or "").strip()
label = (
str(group.get("displayName") or "").strip()
or str(group.get("mail") or "").strip()
or group_id
)
group_type = _classify_group_type(group)
_report(f"Members: {label}")
members = _collect_users(group_id, "/members", headers, [label])
_report(f"Owners: {label}")
owners = _collect_users(group_id, "/owners", headers, [label])
deviations: list[DeviationRecord] = []
for user in members:
deviations.append(_user_to_record(user, label, group_type, "Member"))
for user in owners:
deviations.append(_user_to_record(user, label, group_type, "Owner"))
_report("Scan complete", 0)
return ScanResult(deviations=deviations, warning=None)
def probe_entra(target: str, auth: AuthConfig) -> ProbeResult:
try:
validate_auth_config(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Config: {exc}")
if not (target or "").strip():
return ProbeResult(ok=False, message="Empty group target")
try:
headers = _graph_headers(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Token: {str(exc)[:240]}")
try:
group = _resolve_group(target, headers)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=_probe_hint(str(exc)))
if not group:
return ProbeResult(ok=False, message=f"Group not found: {target}")
return ProbeResult(ok=True, message="OK")
def list_all_groups(auth: AuthConfig, max_count: int = 50000) -> list[str]:
"""
Enumerate every group object id in the tenant (any group type) via Graph.
Returns a list of object IDs that can each be queued as a scan target.
"""
validate_auth_config(auth)
headers = _graph_headers(auth)
next_url: str | None = (
"https://graph.microsoft.com/v1.0/groups"
"?$select=id,displayName,mail&$top=999"
)
ids: list[str] = []
while next_url:
data = _request_json(next_url, headers)
for g in data.get("value", []):
gid = str(g.get("id") or "").strip()
if gid:
ids.append(gid)
if len(ids) > max_count:
raise RuntimeError(f"Group count exceeds limit {max_count}")
nl = data.get("@odata.nextLink")
next_url = nl if isinstance(nl, str) and nl else None
return ids
def _user_to_record(user: _ResolvedUser, group_label: str, group_type: str, role: str) -> DeviationRecord:
via_chain = " > ".join(user.via)
role_name = role
if user.via and user.via != [group_label]:
role_name = f"{role} (via {via_chain})"
return DeviationRecord(
object_url=group_label,
object_type="EntraGroup",
principal=user.upn,
role_name=role_name,
delta_type="present",
permission_type=group_type,
)
def _graph_headers(auth: AuthConfig) -> dict[str, str]:
token = _get_token_for_host("graph.microsoft.com", auth)
return {
"Accept": "application/json",
"Authorization": f"Bearer {token}",
}
def _resolve_group(target: str, headers: dict[str, str]) -> dict | None:
"""Accept a GUID, an email/SMTP, or a displayName."""
cleaned = (target or "").strip()
if not cleaned:
return None
if _is_guid(cleaned):
try:
return _request_json(
f"https://graph.microsoft.com/v1.0/groups/{cleaned}"
"?$select=id,displayName,mail,groupTypes,securityEnabled,mailEnabled",
headers,
)
except Exception: # noqa: BLE001
return None
safe = cleaned.replace("'", "''")
if "@" in cleaned:
url = (
"https://graph.microsoft.com/v1.0/groups"
f"?$filter=mail eq '{safe}'"
"&$select=id,displayName,mail,groupTypes,securityEnabled,mailEnabled"
)
else:
url = (
"https://graph.microsoft.com/v1.0/groups"
f"?$filter=displayName eq '{safe}'"
"&$select=id,displayName,mail,groupTypes,securityEnabled,mailEnabled"
)
try:
data = _request_json(url, headers)
except Exception: # noqa: BLE001
return None
items = data.get("value") or []
return items[0] if items else None
def _classify_group_type(group: dict) -> str:
types = group.get("groupTypes") or []
if isinstance(types, list) and any(str(t).lower() == "unified" for t in types):
return "Microsoft 365"
mail_enabled = bool(group.get("mailEnabled"))
security_enabled = bool(group.get("securityEnabled"))
if mail_enabled and security_enabled:
return "Mail-enabled Security"
if security_enabled:
return "Security"
if mail_enabled:
return "Distribution"
return "Group"
def _collect_users(
group_id: str,
relative: str,
headers: dict[str, str],
via_chain: list[str],
seen_groups: set[str] | None = None,
depth: int = 0,
) -> list[_ResolvedUser]:
if depth > 5:
return []
if seen_groups is None:
seen_groups = set()
next_url: str | None = (
f"https://graph.microsoft.com/v1.0/groups/{group_id}{relative}"
"?$select=id,userPrincipalName,mail,displayName&$top=999"
)
out: list[_ResolvedUser] = []
while next_url:
try:
data = _request_json(next_url, headers)
except Exception: # noqa: BLE001
break
for entry in data.get("value", []):
otype = str(entry.get("@odata.type") or "")
if otype.endswith("user"):
upn = (
str(entry.get("userPrincipalName") or "").strip()
or str(entry.get("mail") or "").strip()
or str(entry.get("displayName") or "").strip()
)
if upn:
out.append(_ResolvedUser(upn=upn, via=list(via_chain)))
elif otype.endswith("group"):
nested_id = str(entry.get("id") or "").strip()
if not nested_id or nested_id in seen_groups:
continue
seen_groups.add(nested_id)
nested_label = (
str(entry.get("displayName") or "").strip()
or str(entry.get("mail") or "").strip()
or nested_id
)
# Nested groups under /members are themselves "members" — we
# recurse via /members only. For /owners, owners of the nested
# group are not themselves owners of the parent in any
# meaningful sense, so we still recurse via /members.
out.extend(
_collect_users(
nested_id,
"/members",
headers,
via_chain + [nested_label],
seen_groups,
depth + 1,
)
)
nl = data.get("@odata.nextLink")
next_url = nl if isinstance(nl, str) and nl else None
return out
def _is_guid(value: str) -> bool:
if not value or len(value) != 36:
return False
parts = value.split("-")
if len(parts) != 5:
return False
return all(all(c in "0123456789abcdefABCDEF" for c in p) for p in parts)
def _probe_hint(error: str) -> str:
low = error.lower()
if "401" in low or "unauthorized" in low or "aadsts" in low:
return f"{error[:200]} — verify Group.Read.All permission and admin consent on Microsoft Graph"
if "403" in low or "forbidden" in low:
return f"{error[:200]} — Microsoft Graph permission denied (Group.Read.All missing?)"
if "404" in low:
return f"{error[:200]} — group not found in this tenant"
return error[:240]

View File

@ -0,0 +1,135 @@
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)][string]$TenantId,
[Parameter(Mandatory=$true)][string]$ClientId,
[Parameter(Mandatory=$true)][string]$Organization,
[Parameter(Mandatory=$true)][string]$Mailbox,
[Parameter(Mandatory=$true)][string]$CertPath
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
function Write-JsonResult {
param($Payload)
Write-Output ($Payload | ConvertTo-Json -Depth 6 -Compress)
}
try {
Import-Module ExchangeOnlineManagement -ErrorAction Stop
} catch {
Write-JsonResult @{ ok = $false; error = "ExchangeOnlineManagement module not available: $($_.Exception.Message)" }
exit 0
}
try {
$pfxPwd = $env:CLEARVIEW_PFX_PASSWORD
if ([string]::IsNullOrEmpty($pfxPwd)) {
Write-JsonResult @{ ok = $false; error = "CLEARVIEW_PFX_PASSWORD not set in environment" }
exit 0
}
$securePwd = ConvertTo-SecureString -String $pfxPwd -AsPlainText -Force
Connect-ExchangeOnline `
-AppId $ClientId `
-Organization $Organization `
-CertificateFilePath $CertPath `
-CertificatePassword $securePwd `
-ShowBanner:$false `
-ShowProgress:$false `
-ErrorAction Stop | Out-Null
} catch {
Write-JsonResult @{ ok = $false; error = "Connect-ExchangeOnline failed: $($_.Exception.Message)" }
exit 0
}
$entries = New-Object System.Collections.Generic.List[object]
$warnings = New-Object System.Collections.Generic.List[string]
try {
$mb = Get-EXOMailbox -Identity $Mailbox -PropertySets All -ErrorAction Stop
# 1) Full Access (and other mailbox-level permissions)
try {
$perms = Get-EXOMailboxPermission -Identity $mb.UserPrincipalName -ErrorAction Stop |
Where-Object { $_.User -notlike 'NT AUTHORITY\SELF' -and $_.User -notlike 'S-1-5-*' -and -not $_.IsInherited -and $_.Deny -eq $false }
foreach ($p in $perms) {
$rights = @($p.AccessRights) -join ', '
$entries.Add([pscustomobject]@{
permission_type = 'FullAccess'
object_type = 'Mailbox'
object = $mb.UserPrincipalName
principal = [string]$p.User
role_name = $rights
})
}
} catch {
$warnings.Add("MailboxPermission: $($_.Exception.Message)")
}
# 2) Send As
try {
$sendAs = Get-EXORecipientPermission -Identity $mb.UserPrincipalName -ErrorAction Stop |
Where-Object { $_.Trustee -notlike 'NT AUTHORITY\SELF' -and $_.Trustee -notlike 'S-1-5-*' -and $_.AccessControlType -eq 'Allow' }
foreach ($p in $sendAs) {
$rights = @($p.AccessRights) -join ', '
$entries.Add([pscustomobject]@{
permission_type = 'SendAs'
object_type = 'Mailbox'
object = $mb.UserPrincipalName
principal = [string]$p.Trustee
role_name = $rights
})
}
} catch {
$warnings.Add("RecipientPermission: $($_.Exception.Message)")
}
# 3) Send on Behalf — from mailbox property
try {
if ($mb.GrantSendOnBehalfTo) {
foreach ($t in $mb.GrantSendOnBehalfTo) {
$entries.Add([pscustomobject]@{
permission_type = 'SendOnBehalf'
object_type = 'Mailbox'
object = $mb.UserPrincipalName
principal = [string]$t
role_name = 'SendOnBehalf'
})
}
}
} catch {
$warnings.Add("GrantSendOnBehalfTo: $($_.Exception.Message)")
}
# 4) Folder-level delegations on Calendar and Inbox
foreach ($folder in 'Calendar', 'Inbox') {
try {
$folderPath = "{0}:\{1}" -f $mb.UserPrincipalName, $folder
$fp = Get-EXOMailboxFolderPermission -Identity $folderPath -ErrorAction Stop |
Where-Object { $_.User.DisplayName -notin @('Default', 'Anonymous') -and $_.AccessRights -notcontains 'None' }
foreach ($p in $fp) {
$rights = @($p.AccessRights) -join ', '
$entries.Add([pscustomobject]@{
permission_type = "Folder:$folder"
object_type = 'MailboxFolder'
object = "$($mb.UserPrincipalName)/$folder"
principal = [string]$p.User.DisplayName
role_name = $rights
})
}
} catch {
$warnings.Add("FolderPermission ${folder}: $($_.Exception.Message)")
}
}
Write-JsonResult @{
ok = $true
mailbox = $mb.UserPrincipalName
entries = $entries
warnings = $warnings
}
} catch {
Write-JsonResult @{ ok = $false; error = $_.Exception.Message }
} finally {
try { Disconnect-ExchangeOnline -Confirm:$false -InformationAction SilentlyContinue -ErrorAction SilentlyContinue | Out-Null } catch {}
}

View File

@ -0,0 +1,67 @@
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)][string]$TenantId,
[Parameter(Mandatory=$true)][string]$ClientId,
[Parameter(Mandatory=$true)][string]$Organization,
[Parameter(Mandatory=$true)][string]$CertPath,
[Parameter(Mandatory=$false)][int]$MaxMailboxes = 50000
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
function Write-JsonResult {
param($Payload)
Write-Output ($Payload | ConvertTo-Json -Depth 4 -Compress)
}
try {
Import-Module ExchangeOnlineManagement -ErrorAction Stop
} catch {
Write-JsonResult @{ ok = $false; error = "ExchangeOnlineManagement module not available: $($_.Exception.Message)" }
exit 0
}
try {
$pfxPwd = $env:CLEARVIEW_PFX_PASSWORD
if ([string]::IsNullOrEmpty($pfxPwd)) {
Write-JsonResult @{ ok = $false; error = "CLEARVIEW_PFX_PASSWORD not set in environment" }
exit 0
}
$securePwd = ConvertTo-SecureString -String $pfxPwd -AsPlainText -Force
Connect-ExchangeOnline `
-AppId $ClientId `
-Organization $Organization `
-CertificateFilePath $CertPath `
-CertificatePassword $securePwd `
-ShowBanner:$false `
-ShowProgress:$false `
-ErrorAction Stop | Out-Null
} catch {
Write-JsonResult @{ ok = $false; error = "Connect-ExchangeOnline failed: $($_.Exception.Message)" }
exit 0
}
try {
$boxes = Get-EXOMailbox -ResultSize Unlimited -PropertySets Minimum -ErrorAction Stop |
Select-Object -ExpandProperty UserPrincipalName
if ($boxes.Count -gt $MaxMailboxes) {
Write-JsonResult @{
ok = $false
error = "Mailbox count $($boxes.Count) exceeds MaxMailboxes=$MaxMailboxes"
count = $boxes.Count
}
exit 0
}
Write-JsonResult @{
ok = $true
count = $boxes.Count
mailboxes = $boxes
}
} catch {
Write-JsonResult @{ ok = $false; error = $_.Exception.Message }
} finally {
try { Disconnect-ExchangeOnline -Confirm:$false -InformationAction SilentlyContinue -ErrorAction SilentlyContinue | Out-Null } catch {}
}

View File

@ -0,0 +1,57 @@
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)][string]$TenantId,
[Parameter(Mandatory=$true)][string]$ClientId,
[Parameter(Mandatory=$true)][string]$Organization,
[Parameter(Mandatory=$true)][string]$Mailbox,
[Parameter(Mandatory=$true)][string]$CertPath
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
function Write-Result {
param([bool]$Ok, [string]$Message)
$obj = [pscustomobject]@{ ok = $Ok; message = $Message }
Write-Output ($obj | ConvertTo-Json -Compress)
}
try {
Import-Module ExchangeOnlineManagement -ErrorAction Stop
} catch {
Write-Result -Ok $false -Message "ExchangeOnlineManagement module not available: $($_.Exception.Message)"
exit 0
}
try {
$pfxPwd = $env:CLEARVIEW_PFX_PASSWORD
if ([string]::IsNullOrEmpty($pfxPwd)) {
Write-Result -Ok $false -Message "CLEARVIEW_PFX_PASSWORD not set in environment"
exit 0
}
$securePwd = ConvertTo-SecureString -String $pfxPwd -AsPlainText -Force
Connect-ExchangeOnline `
-AppId $ClientId `
-Organization $Organization `
-CertificateFilePath $CertPath `
-CertificatePassword $securePwd `
-ShowBanner:$false `
-ShowProgress:$false `
-ErrorAction Stop | Out-Null
} catch {
Write-Result -Ok $false -Message "Connect-ExchangeOnline failed: $($_.Exception.Message)"
exit 0
}
try {
$box = Get-EXOMailbox -Identity $Mailbox -ErrorAction Stop -PropertySets Minimum
if ($null -eq $box) {
Write-Result -Ok $false -Message "Mailbox '$Mailbox' not found"
} else {
Write-Result -Ok $true -Message "OK"
}
} catch {
Write-Result -Ok $false -Message "Get-EXOMailbox failed: $($_.Exception.Message)"
} finally {
try { Disconnect-ExchangeOnline -Confirm:$false -InformationAction SilentlyContinue -ErrorAction SilentlyContinue | Out-Null } catch {}
}

View File

@ -0,0 +1,257 @@
"""
Mailbox permission scanner Exchange Online via PowerShell subprocess.
Requires `pwsh` and the `ExchangeOnlineManagement` module to be installed
in the runtime container. Authentication uses certificate-based app-only
auth, identical to the SharePoint scanner's tenant profile.
"""
from __future__ import annotations
import json
import os
import secrets
import shutil
import subprocess
import tempfile
from pathlib import Path
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.serialization import pkcs12
from cryptography import x509
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
validate_auth_config,
)
_SCRIPTS_DIR = Path(__file__).parent / "exo_scripts"
_PROBE_SCRIPT = _SCRIPTS_DIR / "probe.ps1"
_GET_PERMS_SCRIPT = _SCRIPTS_DIR / "get-permissions.ps1"
_LIST_SCRIPT = _SCRIPTS_DIR / "list-mailboxes.ps1"
# pwsh subprocess timeout — connect can take ~10s, scan up to a few minutes per mailbox
_PWSH_TIMEOUT_SEC = 600
def scan_mailbox_for_deviations(
upn: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
validate_auth_config(auth)
_require_certificate(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
organization = _resolve_organization(auth, upn)
_report(f"Connecting to Exchange Online ({organization})")
payload = _run_pwsh(_GET_PERMS_SCRIPT, auth, organization, upn)
if not payload.get("ok"):
raise RuntimeError(payload.get("error") or "Mailbox scan failed")
entries = payload.get("entries") or []
warnings = payload.get("warnings") or []
mailbox_id = payload.get("mailbox") or upn
_report(f"Mailbox: {mailbox_id} ({len(entries)} entries)", len(entries))
deviations: list[DeviationRecord] = []
for entry in entries:
principal = str(entry.get("principal") or "").strip()
if not principal:
continue
deviations.append(
DeviationRecord(
object_url=str(entry.get("object") or mailbox_id),
object_type=str(entry.get("object_type") or "Mailbox"),
principal=principal,
role_name=str(entry.get("role_name") or ""),
delta_type="present",
permission_type=str(entry.get("permission_type") or ""),
)
)
_report("Scan complete", 0)
warning_text = " | ".join(str(w) for w in warnings) if warnings else None
return ScanResult(deviations=deviations, warning=warning_text)
def list_mailboxes(organization: str, auth: AuthConfig, max_count: int = 50000) -> list[str]:
"""
Enumerate every UserPrincipalName in the tenant via Exchange Online.
`organization` must be the tenant's primary domain (e.g. contoso.onmicrosoft.com).
Raises on connection failure or when the count exceeds max_count.
"""
validate_auth_config(auth)
_require_certificate(auth)
if not shutil.which("pwsh"):
raise RuntimeError("pwsh not available in runtime")
payload = _run_pwsh(_LIST_SCRIPT, auth, organization, mailbox=None, timeout_sec=300)
if not payload.get("ok"):
raise RuntimeError(payload.get("error") or "Mailbox enumeration failed")
mailboxes = payload.get("mailboxes") or []
if not isinstance(mailboxes, list):
return []
cleaned = [str(m).strip().lower() for m in mailboxes if isinstance(m, str) and m.strip()]
if len(cleaned) > max_count:
raise RuntimeError(f"Mailbox count {len(cleaned)} exceeds limit {max_count}")
return cleaned
def probe_mailbox(upn: str, auth: AuthConfig) -> ProbeResult:
try:
validate_auth_config(auth)
_require_certificate(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Config: {exc}")
if not shutil.which("pwsh"):
return ProbeResult(ok=False, message="pwsh not available in runtime")
if not (upn or "").strip() or "@" not in upn:
return ProbeResult(ok=False, message="Invalid mailbox (UPN/email)")
organization = _resolve_organization(auth, upn)
try:
payload = _run_pwsh(_PROBE_SCRIPT, auth, organization, upn)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"pwsh: {str(exc)[:240]}")
ok = bool(payload.get("ok"))
message = str(payload.get("message") or ("OK" if ok else "Unknown error"))
if not ok:
message = _probe_hint(message)
return ProbeResult(ok=ok, message=message)
def _require_certificate(auth: AuthConfig) -> None:
if not (auth.cert_thumbprint and auth.cert_private_key):
raise RuntimeError(
"Mailbox scanning requires a certificate on the tenant profile "
"(client secret is not supported by Exchange Online for app-only auth)."
)
def _resolve_organization(auth: AuthConfig, upn: str) -> str:
"""
Exchange Online expects the organization as the tenant's primary domain
(e.g. contoso.onmicrosoft.com). The UPN domain is the practical default.
"""
domain = upn.split("@", 1)[-1].strip().lower()
return domain or auth.tenant_id
def _run_pwsh(
script: Path,
auth: AuthConfig,
organization: str,
mailbox: str | None = None,
timeout_sec: int = _PWSH_TIMEOUT_SEC,
) -> dict:
if not shutil.which("pwsh"):
raise RuntimeError("pwsh not available in runtime")
public_pem = _resolve_public_cert_pem(auth)
pfx_password = secrets.token_urlsafe(16)
with tempfile.TemporaryDirectory(prefix="clearview-exo-") as tmp:
pfx_path = Path(tmp) / "cert.pfx"
_write_pfx(
private_key_pem=auth.cert_private_key or "",
public_cert_pem=public_pem,
out_path=pfx_path,
password=pfx_password,
)
cmd = [
"pwsh",
"-NoProfile",
"-NonInteractive",
"-File", str(script),
"-TenantId", auth.tenant_id,
"-ClientId", auth.client_id,
"-Organization", organization,
"-CertPath", str(pfx_path),
]
if mailbox is not None:
cmd.extend(["-Mailbox", mailbox])
env = os.environ.copy()
env["CLEARVIEW_PFX_PASSWORD"] = pfx_password
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout_sec,
env=env,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(f"pwsh script timed out after {timeout_sec}s") from exc
if result.returncode != 0:
stderr = (result.stderr or "").strip()[:500]
raise RuntimeError(f"pwsh exited with code {result.returncode}: {stderr}")
out = (result.stdout or "").strip()
if not out:
raise RuntimeError("pwsh returned empty output")
last_line = out.splitlines()[-1]
try:
return json.loads(last_line)
except json.JSONDecodeError as exc:
raise RuntimeError(f"Could not parse pwsh JSON output: {out[:500]}") from exc
def _resolve_public_cert_pem(auth: AuthConfig) -> str:
"""
The public cert PEM is stored on the tenant profile via the AuthConfig
extension below. This helper raises if it is missing happens for tenants
whose certificate was generated before cert_public_pem was stored.
"""
pem = getattr(auth, "cert_public_pem", None)
if not pem:
raise RuntimeError(
"Tenant certificate has no public PEM stored. "
"Regenerate the certificate to enable mailbox scanning."
)
return pem
def _write_pfx(private_key_pem: str, public_cert_pem: str, out_path: Path, password: str) -> None:
private_key = serialization.load_pem_private_key(private_key_pem.encode(), password=None)
cert = x509.load_pem_x509_certificate(public_cert_pem.encode())
pfx_bytes = pkcs12.serialize_key_and_certificates(
name=b"clearview",
key=private_key,
cert=cert,
cas=None,
encryption_algorithm=serialization.BestAvailableEncryption(password.encode()),
)
out_path.write_bytes(pfx_bytes)
def _probe_hint(message: str) -> str:
low = message.lower()
if "unauthorized" in low or "401" in low or "aadsts" in low:
return f"{message[:200]} — verify Exchange.ManageAsApp permission, admin consent, and the Exchange Administrator role assignment"
if "not found" in low or "couldn't find object" in low:
return f"{message[:200]} — mailbox not found in this tenant"
if "module not available" in low:
return f"{message[:200]} — install the ExchangeOnlineManagement module in the container"
return message[:240]

View File

@ -0,0 +1,855 @@
from __future__ import annotations
import re
import threading
import time
from dataclasses import dataclass
from urllib.parse import urlparse
import msal
import requests
from ..config import (
SCAN_HTTP_BACKOFF_SEC,
SCAN_HTTP_MAX_RETRIES,
SCAN_HTTP_TIMEOUT_SEC,
SCAN_LIST_PAGE_SIZE,
SCAN_MAX_ITEMS_PER_LIST,
SHAREPOINT_SCAN_MODE,
)
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
validate_auth_config,
)
@dataclass(frozen=True)
class PermissionEntry:
principal: str
role_name: str
# Cache maps cache_key -> (access_token, expires_at_epoch). Guarded by
# _TOKEN_LOCK because the worker acquires tokens from multiple threads.
_TOKEN_CACHE: dict[str, tuple[str, float]] = {}
_TOKEN_LOCK = threading.Lock()
# Reuse one MSAL app per (tenant, client, auth_method) so MSAL's own token
# cache works and refreshes app tokens automatically.
_MSAL_APPS: dict[str, "msal.ConfidentialClientApplication"] = {}
def scan_site_for_deviations(
site_url: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""
Scan SharePoint permission deviations versus site-root role assignments.
Only SharePoint role assignments are used (site/list/folder/file scope).
No filesystem/NTFS permission model is used.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ScanResult(
deviations=[],
warning=(
"SharePoint scan mode is 'placeholder'. "
"Set SHAREPOINT_SCAN_MODE=sharepoint_app_only and configure Azure app credentials."
),
)
if SHAREPOINT_SCAN_MODE != "sharepoint_app_only":
raise RuntimeError(f"Unsupported SHAREPOINT_SCAN_MODE='{SHAREPOINT_SCAN_MODE}'")
validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
parsed = urlparse(site_url)
host = parsed.netloc
_report(f"Connecting to {host}")
token = _get_token_for_host(host, auth)
base_headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
_report(f"Loading site permissions: {site_url}")
root_assignments = _get_role_assignments(
f"{site_url}/_api/web/roleassignments?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
root_set = set(root_assignments)
deviations: list[DeviationRecord] = []
warnings: list[str] = []
lists_url = (
f"{site_url}/_api/web/lists"
"?$select=Id,Title,BaseTemplate,Hidden,ItemCount,RootFolder/ServerRelativeUrl,HasUniqueRoleAssignments"
"&$expand=RootFolder"
)
for lst in _iter_paged(lists_url, base_headers):
if _to_bool(lst.get("Hidden")):
continue
if _to_int(lst.get("BaseTemplate")) != 101:
continue
list_id = str(lst.get("Id", "")).strip()
if not list_id:
continue
list_title = str(lst.get("Title") or "Document Library")
list_url = _absolute_url(host, str((lst.get("RootFolder") or {}).get("ServerRelativeUrl") or ""))
_report(f"Library: {list_title}")
if _to_bool(lst.get("HasUniqueRoleAssignments")):
list_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=list_url,
object_type="DocumentLibrary",
root_set=root_set,
current_set=set(list_assignments),
)
)
items_processed = 0
items_total = 0
items_url = (
f"{site_url}/_api/web/lists(guid'{list_id}')/items"
f"?$select=Id,FileRef,FileSystemObjectType,HasUniqueRoleAssignments&$top={SCAN_LIST_PAGE_SIZE}"
)
for item in _iter_paged(items_url, base_headers):
items_total += 1
if items_total % 50 == 0:
_report(f"Library: {list_title} ({items_total} items scanned)", 50)
if not _to_bool(item.get("HasUniqueRoleAssignments")):
continue
if items_processed >= SCAN_MAX_ITEMS_PER_LIST:
warnings.append(
f"List '{list_title}' hit SCAN_MAX_ITEMS_PER_LIST={SCAN_MAX_ITEMS_PER_LIST}; remaining unique-permission items skipped"
)
break
item_id = _to_int(item.get("Id"))
if item_id <= 0:
continue
file_ref = str(item.get("FileRef") or "")
if not file_ref:
continue
item_type = "File" if _to_int(item.get("FileSystemObjectType")) == 0 else "Folder"
item_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/items({item_id})/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=_absolute_url(host, file_ref),
object_type=item_type,
root_set=root_set,
current_set=set(item_assignments),
)
)
items_processed += 1
_report("Scan complete", 0)
warning = " | ".join(warnings) if warnings else None
return ScanResult(deviations=_deduplicate_hierarchical(deviations), warning=warning)
def scan_site_root_permissions(
site_url: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""
Collect the role assignments at the site-root level without traversing
libraries, folders, or items. Each assignment is reported as a record
with delta_type='root' so it is distinguishable from the deviation scan.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ScanResult(
deviations=[],
warning="SharePoint scan mode is 'placeholder'.",
)
if SHAREPOINT_SCAN_MODE != "sharepoint_app_only":
raise RuntimeError(f"Unsupported SHAREPOINT_SCAN_MODE='{SHAREPOINT_SCAN_MODE}'")
validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
parsed = urlparse(site_url)
host = parsed.netloc
_report(f"Connecting to {host}")
token = _get_token_for_host(host, auth)
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
_report(f"Loading root permissions: {site_url}")
root_assignments = _get_role_assignments(
f"{site_url}/_api/web/roleassignments?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
headers,
)
filtered = [e for e in root_assignments if not _is_noise_principal(e.principal)]
records: list[DeviationRecord] = []
for entry in sorted(filtered, key=lambda e: (e.principal.lower(), e.role_name.lower())):
records.append(
DeviationRecord(
object_url=site_url,
object_type="Site",
principal=entry.principal,
role_name=entry.role_name,
delta_type="root",
)
)
_report("Scan complete", 0)
skipped = len(root_assignments) - len(filtered)
warning = f"{skipped} SharingLinks/system entries hidden" if skipped else None
return ScanResult(deviations=records, warning=warning)
def is_sharepoint_group_principal(principal: str) -> bool:
"""
Heuristic: a SharePoint group has a plain display-name principal
(no claim-encoded prefix, no email shape). Used to decide which entries
can be resolved via /_api/web/sitegroups/getbyname.
"""
if not principal:
return False
p = principal.strip()
if not p:
return False
# Claim-encoded principals: c:0o.c|..., i:0#.f|..., c:0t.c|..., c:0(.s|...
if p.startswith(("c:0", "i:0")):
return False
if "|" in p:
return False
# Email-shape user
if "@" in p:
return False
# SharingLinks are handled by the dedicated resolver
if p.lower().startswith("sharinglinks."):
return False
return True
# SharePoint PrincipalType: 1=User, 2=DistributionList, 4=SecurityGroup,
# 8=SharePointGroup, 16=All.
_PRINCIPAL_TYPE_USER = 1
_GUID_RE = re.compile(
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
)
def _extract_user_upn(login: str) -> str:
"""
Pull the UPN/email out of a user claim LoginName so individually granted
users render readably instead of as a raw claim string.
i:0#.f|membership|jan@contoso.com -> jan@contoso.com
i:0#.w|contoso\\jan -> "" (no UPN; caller falls back to Title)
"""
if not login:
return ""
tail = login.rsplit("|", 1)[-1].strip()
return tail if "@" in tail else ""
def _display_principal(login: str, title: str, principal_type: int) -> str:
"""
Choose the most readable identity for a role-assignment member.
For individual users we surface the UPN/email instead of the claim-encoded
LoginName when one is present; for everything else (groups, system/built-in
accounts, on-prem claims) we keep the original LoginName so claim object ids
stay resolvable and the site-root noise filter still recognises them.
"""
if principal_type == _PRINCIPAL_TYPE_USER:
upn = _extract_user_upn(login)
if upn:
return upn
return login or title
def _extract_aad_group_object_id(principal: str) -> str | None:
"""
Return the Entra/AAD object id encoded in a claim principal, or None.
c:0t.c|tenant|<guid> -> <guid> (security group)
c:0o.c|federateddirectoryclaimprovider|<guid> -> <guid> (M365 group members)
c:0o.c|federateddirectoryclaimprovider|<guid>_o -> <guid> (M365 group owners)
"""
if not principal:
return None
lowered = principal.strip().lower()
if not (
lowered.startswith("c:0t.c|tenant|")
or lowered.startswith("c:0o.c|federateddirectoryclaimprovider|")
):
return None
tail = principal.rsplit("|", 1)[-1].strip()
if tail.endswith("_o"):
tail = tail[:-2]
return tail if _GUID_RE.match(tail) else None
def is_aad_group_principal(principal: str) -> bool:
"""True when the principal is an Entra/AAD or M365 group we can expand via Graph."""
return _extract_aad_group_object_id(principal) is not None
def resolve_aad_group_members(principal: str, auth: AuthConfig) -> list[str]:
"""
Expand an Entra/AAD or M365 group assigned directly at root into its
member (and owner) list via Microsoft Graph. Returns an empty list when
the principal is not such a group or when Graph cannot read it.
"""
object_id = _extract_aad_group_object_id(principal)
if not object_id:
return []
validate_auth_config(auth)
seen: set[str] = set()
return _expand_aad_group_by_id(object_id, auth, seen, depth=0)
def _is_noise_principal(principal: str) -> bool:
"""
SharePoint surfaces several principal types at site-root level that are
not part of a meaningful root-permission inventory:
- SharingLinks.<guid>.<LinkType>.<guid> auto-created when a child item is shared
- System / built-in accounts (SHAREPOINT\\system, NT AUTHORITY\\*)
- "Limited Access System Group" SP groups
"""
if not principal:
return True
p = principal.lower()
if "sharinglinks." in p:
return True
if p.startswith("sharepoint\\") or p.startswith("nt authority\\"):
return True
if "limited access system group" in p:
return True
return False
def probe_site(site_url: str, auth: AuthConfig) -> ProbeResult:
"""
Lightweight preflight: validate that the configured credentials can
reach the site and read role assignments.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ProbeResult(ok=False, message="SHAREPOINT_SCAN_MODE=placeholder")
try:
validate_auth_config(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Config: {exc}")
parsed = urlparse(site_url)
host = parsed.netloc
if not host:
return ProbeResult(ok=False, message="Invalid site URL")
try:
token = _get_token_for_host(host, auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Token: {str(exc)[:240]}")
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
try:
_probe_request(f"{site_url}/_api/web?$select=Title", headers)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=_probe_hint(str(exc), stage="site"))
try:
_probe_request(
f"{site_url}/_api/web/roleassignments?$top=1&$select=PrincipalId",
headers,
)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=_probe_hint(str(exc), stage="roleassignments"))
return ProbeResult(ok=True, message="OK")
def resolve_sharing_link_members(
site_url: str,
group_name: str,
auth: AuthConfig,
) -> list[str]:
"""
Return members of a SharePoint group. When a member is itself an
M365/AAD group, expand it via Microsoft Graph (recursion-bounded).
Returns an empty list for anonymous links and groups that cannot be read.
"""
raw_users = _get_sp_group_users(site_url, group_name, auth)
members: list[str] = []
seen_groups: set[str] = set()
for user in raw_users:
members.extend(_render_principal(user, auth, seen_groups, depth=0))
return members
def _get_sp_group_users(site_url: str, group_name: str, auth: AuthConfig) -> list[dict]:
validate_auth_config(auth)
parsed = urlparse(site_url)
host = parsed.netloc
token = _get_token_for_host(host, auth)
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
encoded = group_name.replace("'", "''")
url = (
f"{site_url}/_api/web/sitegroups/getbyname('{encoded}')/users"
"?$select=LoginName,Email,Title,PrincipalType"
)
try:
data = _request_json(url, headers)
except Exception: # noqa: BLE001
return []
return list(_extract_values(data))
# SharePoint PrincipalType values:
# 1 = User, 2 = DistributionList, 4 = SecurityGroup, 8 = SharePointGroup, 16 = All
_PRINCIPAL_TYPE_GROUP = {2, 4}
def _render_principal(user: dict, auth: AuthConfig, seen: set[str], depth: int) -> list[str]:
email = str(user.get("Email") or "").strip()
login = str(user.get("LoginName") or "").strip()
title = str(user.get("Title") or "").strip()
if login.upper().startswith("SHAREPOINT\\") or login.startswith("c:0(.s|true"):
return []
is_group = (
_to_int(user.get("PrincipalType")) in _PRINCIPAL_TYPE_GROUP
or "federateddirectoryclaimprovider" in login.lower()
or "tenant|" in login.lower()
)
if is_group and email and depth < 3:
nested = _expand_aad_group_via_graph(email, auth, seen, depth=depth + 1)
label = title or email
if nested:
return [f"{label} [{', '.join(nested)}]"]
return [f"{label} (group, no readable members)"]
if email:
return [email]
if title:
return [title]
if login:
return [login]
return []
def _expand_aad_group_via_graph(
group_mail: str,
auth: AuthConfig,
seen: set[str],
depth: int,
) -> list[str]:
if depth > 3:
return [f"… (recursion limit)"]
key = group_mail.strip().lower()
if not key or key in seen:
return []
seen.add(key)
try:
token = _get_token_for_host("graph.microsoft.com", auth)
except Exception: # noqa: BLE001
return []
headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"}
safe_mail = key.replace("'", "''")
lookup_url = (
"https://graph.microsoft.com/v1.0/groups"
f"?$filter=mail eq '{safe_mail}'&$select=id,displayName"
)
try:
data = _request_json(lookup_url, headers)
except Exception: # noqa: BLE001
return []
groups = data.get("value") or []
if not groups:
return []
group_id = str(groups[0].get("id") or "").strip()
if not group_id:
return []
return _expand_aad_group_by_id(group_id, auth, seen, depth)
def _expand_aad_group_by_id(
group_id: str,
auth: AuthConfig,
seen: set[str],
depth: int,
) -> list[str]:
if depth > 3:
return ["… (recursion limit)"]
key = f"id:{group_id.strip().lower()}"
if not group_id.strip() or key in seen:
return []
seen.add(key)
try:
token = _get_token_for_host("graph.microsoft.com", auth)
except Exception: # noqa: BLE001
return []
headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"}
out: list[str] = []
out.extend(_graph_collect(f"/groups/{group_id}/members", headers, auth, seen, depth, owner=False))
out.extend(_graph_collect(f"/groups/{group_id}/owners", headers, auth, seen, depth, owner=True))
return _dedup_preserve_order(out)
def _graph_collect(
relative: str,
headers: dict[str, str],
auth: AuthConfig,
seen: set[str],
depth: int,
owner: bool,
) -> list[str]:
next_url: str | None = (
f"https://graph.microsoft.com/v1.0{relative}"
"?$select=id,userPrincipalName,mail,displayName"
)
out: list[str] = []
while next_url:
try:
data = _request_json(next_url, headers)
except Exception: # noqa: BLE001
return out
for entry in data.get("value", []):
otype = str(entry.get("@odata.type") or "")
if otype.endswith("user"):
upn = (
str(entry.get("userPrincipalName") or "").strip()
or str(entry.get("mail") or "").strip()
or str(entry.get("displayName") or "").strip()
)
if upn:
out.append(f"{upn} (owner)" if owner else upn)
elif otype.endswith("group"):
nested_mail = str(entry.get("mail") or "").strip()
if nested_mail:
nested = _expand_aad_group_via_graph(nested_mail, auth, seen, depth + 1)
label = str(entry.get("displayName") or nested_mail)
if nested:
out.append(f"{label} [{', '.join(nested)}]")
else:
out.append(f"{label} (group, no readable members)")
nl = data.get("@odata.nextLink")
next_url = nl if isinstance(nl, str) and nl else None
return out
def _dedup_preserve_order(items: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
for item in items:
if item not in seen:
seen.add(item)
result.append(item)
return result
def _probe_request(url: str, headers: dict[str, str]) -> None:
response = requests.get(url, headers=headers, timeout=SCAN_HTTP_TIMEOUT_SEC)
if response.status_code >= 400:
snippet = (response.text or "").strip()[:200]
raise RuntimeError(f"HTTP {response.status_code}: {snippet or '{}'}")
def _probe_hint(error: str, stage: str) -> str:
if "401" in error:
if stage == "roleassignments":
return f"{error[:180]} — likely missing admin consent or insufficient permission"
return f"{error[:180]} — likely certificate not uploaded in Azure, or wrong tenant/client id"
if "403" in error:
return f"{error[:180]} — app has no access to this site (Sites.Selected without per-site grant?)"
if "404" in error:
return f"{error[:180]} — site not found"
return error[:220]
def _get_msal_app(auth: AuthConfig, auth_method: str) -> "msal.ConfidentialClientApplication":
"""Return a cached ConfidentialClientApplication for these credentials.
Reusing the app object lets MSAL's built-in token cache serve and refresh
app-only tokens instead of re-authenticating on every call.
"""
app_key = f"{auth.tenant_id}|{auth.client_id}|{auth_method}"
app = _MSAL_APPS.get(app_key)
if app is not None:
return app
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
if auth_method == "cert":
client_credential: dict[str, str | None] | str | None = {
"thumbprint": auth.cert_thumbprint,
"private_key": auth.cert_private_key,
}
else:
client_credential = auth.client_secret
app = msal.ConfidentialClientApplication(
client_id=auth.client_id,
authority=authority,
client_credential=client_credential,
)
_MSAL_APPS[app_key] = app
return app
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
with _TOKEN_LOCK:
cached = _TOKEN_CACHE.get(cache_key)
if cached is not None and time.time() < cached[1]:
return cached[0]
scope = f"https://{host}/.default"
app = _get_msal_app(auth, auth_method)
result = app.acquire_token_for_client(scopes=[scope])
if "access_token" not in result:
error = result.get("error", "unknown")
description = result.get("error_description", "")
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
token = str(result["access_token"])
# expires_in is seconds-from-now; refresh 60s early to avoid edge expiry.
expires_in = int(result.get("expires_in", 3600))
expires_at = time.time() + max(expires_in - 60, 0)
_TOKEN_CACHE[cache_key] = (token, expires_at)
return token
def _iter_paged(url: str, headers: dict[str, str]):
next_url = url
while next_url:
data = _request_json(next_url, headers)
for item in _extract_values(data):
yield item
next_url = _extract_next_link(data)
def _request_json(url: str, headers: dict[str, str]) -> dict:
last_error: str | None = None
for attempt in range(1, SCAN_HTTP_MAX_RETRIES + 1):
try:
response = requests.get(url, headers=headers, timeout=SCAN_HTTP_TIMEOUT_SEC)
if response.status_code in (429, 503):
retry_after = _to_int(response.headers.get("Retry-After"))
delay = retry_after if retry_after > 0 else SCAN_HTTP_BACKOFF_SEC * attempt
time.sleep(delay)
continue
if response.status_code >= 400:
raise RuntimeError(f"HTTP {response.status_code}: {response.text[:300]}")
return response.json()
except Exception as exc: # noqa: BLE001
last_error = str(exc)
if attempt < SCAN_HTTP_MAX_RETRIES:
time.sleep(SCAN_HTTP_BACKOFF_SEC * attempt)
continue
raise RuntimeError(f"Request failed for {url}: {last_error}") from exc
raise RuntimeError(f"Request failed for {url}: {last_error}")
def _extract_values(data: dict) -> list[dict]:
if "value" in data and isinstance(data["value"], list):
return data["value"]
d = data.get("d")
if isinstance(d, dict):
results = d.get("results")
if isinstance(results, list):
return results
return []
def _extract_next_link(data: dict) -> str | None:
for key in ("@odata.nextLink", "odata.nextLink", "__next"):
value = data.get(key)
if isinstance(value, str) and value:
return value
d = data.get("d")
if isinstance(d, dict):
value = d.get("__next")
if isinstance(value, str) and value:
return value
return None
def _get_role_assignments(url: str, headers: dict[str, str]) -> list[PermissionEntry]:
data = _request_json(url, headers)
assignments: list[PermissionEntry] = []
for item in _extract_values(data):
member = item.get("Member") or {}
login = str(member.get("LoginName") or "").strip()
title = str(member.get("Title") or "").strip()
principal_type = _to_int(member.get("PrincipalType"))
principal = _display_principal(login, title, principal_type)
if not principal:
continue
role_bindings = item.get("RoleDefinitionBindings")
roles = _extract_role_names(role_bindings)
for role_name in roles:
if role_name.lower() == "limited access":
continue
assignments.append(PermissionEntry(principal=principal, role_name=role_name))
return assignments
_ROLE_NAME_NL_TO_EN: dict[str, str] = {
"volledig beheer": "Full Control",
"ontwerpen": "Design",
"bewerken": "Edit",
"bijdragen": "Contribute",
"lezen": "Read",
"beperkte toegang": "Limited Access",
"goedkeuren": "Approve",
"hiërarchieën beheren": "Manage Hierarchy",
"weergeven alleen": "View Only",
"beperkt lezen": "Restricted Read",
}
def _normalize_role_name(name: str) -> str:
return _ROLE_NAME_NL_TO_EN.get(name.lower(), name)
def _extract_role_names(bindings) -> list[str]:
if isinstance(bindings, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in bindings if isinstance(x, dict) and x.get("Name")]
if isinstance(bindings, dict):
results = bindings.get("results")
if isinstance(results, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in results if isinstance(x, dict) and x.get("Name")]
return []
def _deduplicate_hierarchical(deviations: list[DeviationRecord]) -> list[DeviationRecord]:
"""
Remove child-level deviations that are already covered by a parent in the URL hierarchy.
"""
sorted_devs = sorted(deviations, key=lambda d: len(d.object_url))
covered: dict[tuple[str, str], list[str]] = {}
result: list[DeviationRecord] = []
for dev in sorted_devs:
key = (dev.principal, dev.role_name)
ancestor_urls = covered.get(key)
if ancestor_urls:
parent = dev.object_url.rstrip("/")
already_covered = any(
parent == anc.rstrip("/") or parent.startswith(anc.rstrip("/") + "/")
for anc in ancestor_urls
)
if already_covered:
continue
else:
covered[key] = []
result.append(dev)
covered[key].append(dev.object_url)
return result
def _deviation_records_only_added(
object_url: str,
object_type: str,
root_set: set[PermissionEntry],
current_set: set[PermissionEntry],
) -> list[DeviationRecord]:
records: list[DeviationRecord] = []
for entry in sorted(current_set - root_set, key=lambda x: (x.principal.lower(), x.role_name.lower())):
records.append(
DeviationRecord(
object_url=object_url,
object_type=object_type,
principal=entry.principal,
role_name=entry.role_name,
delta_type="added",
)
)
return records
def _absolute_url(host: str, server_relative_url: str) -> str:
if not server_relative_url:
return f"https://{host}"
if server_relative_url.startswith("http://") or server_relative_url.startswith("https://"):
return server_relative_url
if not server_relative_url.startswith("/"):
server_relative_url = "/" + server_relative_url
return f"https://{host}{server_relative_url}"
def _to_int(value) -> int:
try:
if value is None:
return 0
return int(value)
except (TypeError, ValueError):
return 0
def _to_bool(value) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in ("1", "true", "yes")
return bool(value)

View File

@ -1,21 +1,28 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field, HttpUrl from pydantic import BaseModel, Field, HttpUrl
# Valid scan types, mirrored by the frontend scan-type dropdowns. Used to
# validate incoming job requests (FastAPI returns 422 on anything else).
ScanType = Literal["sharepoint", "sharepoint_root", "mailbox", "entra_groups"]
class CreateTenantProfileRequest(BaseModel): class CreateTenantProfileRequest(BaseModel):
name: str name: str
tenant_id: str tenant_id: str
client_id: str client_id: str
client_secret: str | None = None client_secret: str | None = None
primary_domain: str | None = None
class TenantProfileItem(BaseModel): class TenantProfileItem(BaseModel):
id: str id: str
name: str name: str
tenant_id: str tenant_id: str
primary_domain: str | None = None
client_id: str client_id: str
has_certificate: bool has_certificate: bool
cert_thumbprint: str | None cert_thumbprint: str | None
@ -31,7 +38,13 @@ class TenantCertificateResponse(BaseModel):
class CreateScanJobRequest(BaseModel): class CreateScanJobRequest(BaseModel):
scan_type: ScanType = "sharepoint"
site_urls: list[HttpUrl] = Field(default_factory=list) site_urls: list[HttpUrl] = Field(default_factory=list)
mailboxes: list[str] = Field(default_factory=list)
scan_all_mailboxes: bool = False
organization: str | None = None
group_ids: list[str] = Field(default_factory=list)
scan_all_groups: bool = False
skip_default_sites: bool = True skip_default_sites: bool = True
tenant_profile_id: str | None = None tenant_profile_id: str | None = None
tenant_id: str | None = None tenant_id: str | None = None
@ -43,6 +56,7 @@ class ScanJobSummary(BaseModel):
id: str id: str
status: str status: str
source_type: str source_type: str
scan_type: str
skip_default_sites: bool skip_default_sites: bool
tenant_profile_id: str | None tenant_profile_id: str | None
tenant_name: str | None tenant_name: str | None
@ -72,6 +86,16 @@ class ScanTargetItem(BaseModel):
error_message: str | None error_message: str | None
started_at: datetime | None started_at: datetime | None
finished_at: datetime | None finished_at: datetime | None
last_probe_at: datetime | None = None
last_probe_ok: bool | None = None
last_probe_message: str | None = None
class ProbeResultResponse(BaseModel):
target_id: int
ok: bool
message: str
last_probe_at: datetime
class PermissionDeviationItem(BaseModel): class PermissionDeviationItem(BaseModel):
@ -82,7 +106,8 @@ class PermissionDeviationItem(BaseModel):
principal: str principal: str
role_name: str role_name: str
delta_type: str delta_type: str
resolved_members: str | None permission_type: str | None = None
resolved_members: str | None = None
created_at: datetime created_at: datetime
@ -95,6 +120,16 @@ class ResolveSharingLinksResponse(BaseModel):
updated_deviations: int updated_deviations: int
class ResolveGroupsResponse(BaseModel):
resolved_groups: int
skipped_groups: int
updated_deviations: int
class SharingLinkTypesResponse(BaseModel):
type_counts: dict[str, int]
class ScanJobDetail(ScanJobSummary): class ScanJobDetail(ScanJobSummary):
targets: list[ScanTargetItem] targets: list[ScanTargetItem]
deviations: list[PermissionDeviationItem] deviations: list[PermissionDeviationItem]

View File

@ -0,0 +1,22 @@
"""Clearview version metadata.
The three-part VERSION is the release version. Dev/test builds append the
explicit BUILD segment. Build numbers are source state, not derived from git
history, so operators can see exactly which image build is running.
"""
from __future__ import annotations
VERSION = "v0.2.0"
BUILD = 0
def display_version() -> str:
"""Return the user-visible Clearview version."""
if BUILD > 0:
return f"{VERSION}.{BUILD}"
return VERSION
def cache_version() -> str:
"""Return the static-asset cache-buster version."""
return display_version().lstrip("v")

View File

@ -4,7 +4,7 @@ import logging
import threading import threading
import time import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
from datetime import datetime from datetime import datetime, timezone
from sqlalchemy import select from sqlalchemy import select
@ -14,9 +14,12 @@ from .config import (
SCAN_TARGET_MAX_RETRIES, SCAN_TARGET_MAX_RETRIES,
SCAN_TARGET_TIMEOUT_SEC, SCAN_TARGET_TIMEOUT_SEC,
) )
from .auth.sessions import purge_expired
from .db import SessionLocal from .db import SessionLocal
from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
from .scanner import AuthConfig, scan_site_for_deviations from .scanners import AuthConfig, ProbeResult, probe, scan
_SESSION_PURGE_INTERVAL_SEC = 300
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -25,6 +28,7 @@ class ScanWorker:
def __init__(self) -> None: def __init__(self) -> None:
self._stop_event = threading.Event() self._stop_event = threading.Event()
self._thread: threading.Thread | None = None self._thread: threading.Thread | None = None
self._last_session_purge: float = 0.0
def start(self) -> None: def start(self) -> None:
if self._thread and self._thread.is_alive(): if self._thread and self._thread.is_alive():
@ -41,23 +45,42 @@ class ScanWorker:
def _run(self) -> None: def _run(self) -> None:
while not self._stop_event.is_set(): while not self._stop_event.is_set():
self._maybe_purge_sessions()
did_work = self._process_next_job() did_work = self._process_next_job()
if not did_work: if not did_work:
self._stop_event.wait(SCAN_JOB_POLL_INTERVAL_SEC) self._stop_event.wait(SCAN_JOB_POLL_INTERVAL_SEC)
def _maybe_purge_sessions(self) -> None:
now = time.monotonic()
if now - self._last_session_purge < _SESSION_PURGE_INTERVAL_SEC:
return
self._last_session_purge = now
try:
with SessionLocal() as db:
removed = purge_expired(db)
db.commit()
if removed:
log.info("purged %d expired auth sessions", removed)
except Exception:
log.exception("auth session purge failed")
def _process_next_job(self) -> bool: def _process_next_job(self) -> bool:
with SessionLocal() as db: with SessionLocal() as db:
# Atomic claim: lock the chosen queued row and skip rows already
# locked by another worker, so multiple workers/replicas never grab
# the same job. The status flip is committed in this transaction.
job = db.execute( job = db.execute(
select(ScanJob) select(ScanJob)
.where(ScanJob.status == "queued") .where(ScanJob.status == "queued")
.order_by(ScanJob.created_at.asc()) .order_by(ScanJob.created_at.asc())
.limit(1) .limit(1)
.with_for_update(skip_locked=True)
).scalar_one_or_none() ).scalar_one_or_none()
if job is None: if job is None:
return False return False
now = datetime.utcnow() now = datetime.now(timezone.utc)
job.status = "running" job.status = "running"
job.started_at = now job.started_at = now
job.heartbeat_at = now job.heartbeat_at = now
@ -96,7 +119,7 @@ class ScanWorker:
job = db.get(ScanJob, job_id) job = db.get(ScanJob, job_id)
if not job: if not job:
return return
now = datetime.utcnow() now = datetime.now(timezone.utc)
job.heartbeat_at = now job.heartbeat_at = now
job.updated_at = now job.updated_at = now
job.finished_at = now job.finished_at = now
@ -113,7 +136,7 @@ class ScanWorker:
if not job or not target: if not job or not target:
return return
now = datetime.utcnow() now = datetime.now(timezone.utc)
target.status = "running" target.status = "running"
target.started_at = now target.started_at = now
target.updated_at = now target.updated_at = now
@ -121,6 +144,28 @@ class ScanWorker:
job.updated_at = now job.updated_at = now
db.commit() db.commit()
probe = self._run_probe(target_id)
if not probe.ok:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
target = db.get(ScanTarget, target_id)
if not job or not target:
return
now = datetime.now(timezone.utc)
target.status = "failed"
target.attempts = 1
target.error_message = f"Preflight: {probe.message}"
target.finished_at = now
target.updated_at = now
job.processed_targets += 1
job.failed_targets += 1
job.heartbeat_at = now
job.updated_at = now
if not job.error_message:
job.error_message = "One or more scan targets failed preflight"
db.commit()
return
max_attempts = SCAN_TARGET_MAX_RETRIES + 1 max_attempts = SCAN_TARGET_MAX_RETRIES + 1
last_error: str | None = None last_error: str | None = None
latest_warning: str | None = None latest_warning: str | None = None
@ -147,10 +192,11 @@ class ScanWorker:
principal=deviation.principal, principal=deviation.principal,
role_name=deviation.role_name, role_name=deviation.role_name,
delta_type=deviation.delta_type, delta_type=deviation.delta_type,
permission_type=deviation.permission_type,
) )
) )
now = datetime.utcnow() now = datetime.now(timezone.utc)
target.status = "completed" target.status = "completed"
target.attempts = attempt target.attempts = attempt
target.error_message = None target.error_message = None
@ -180,7 +226,7 @@ class ScanWorker:
if not job or not target: if not job or not target:
return return
now = datetime.utcnow() now = datetime.now(timezone.utc)
target.status = "failed" target.status = "failed"
target.attempts = max_attempts target.attempts = max_attempts
target.error_message = last_error target.error_message = last_error
@ -196,6 +242,48 @@ class ScanWorker:
db.commit() db.commit()
def _run_probe(self, target_id: int):
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if not target:
return ProbeResult(ok=False, message="Target not found")
site_url = target.site_url
job = db.get(ScanJob, target.job_id)
if not job:
return ProbeResult(ok=False, message="Job not found")
scan_type = job.scan_type or "sharepoint"
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
result = probe(scan_type, site_url, auth)
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if target:
now = datetime.now(timezone.utc)
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
target.updated_at = now
db.commit()
return result
def _scan_with_timeout(self, target_id: int, timeout_sec: int): def _scan_with_timeout(self, target_id: int, timeout_sec: int):
with SessionLocal() as db: with SessionLocal() as db:
target = db.get(ScanTarget, target_id) target = db.get(ScanTarget, target_id)
@ -205,37 +293,42 @@ class ScanWorker:
job = db.get(ScanJob, target.job_id) job = db.get(ScanJob, target.job_id)
if not job: if not job:
raise RuntimeError(f"Job {target.job_id} not found for target {target_id}") raise RuntimeError(f"Job {target.job_id} not found for target {target_id}")
scan_type = job.scan_type or "sharepoint"
job_id = job.id
cert_private_key: str | None = None cert_private_key: str | None = None
cert_thumbprint: str | None = None cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id: if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id) profile = db.get(TenantProfile, job.tenant_profile_id)
if profile: if profile:
cert_private_key = profile.cert_private_key cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig( auth = AuthConfig(
tenant_id=job.auth_tenant_id or "", tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "", client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "", client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key, cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint, cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
) )
def progress_callback(activity: str, items: int) -> None: def progress_callback(activity: str, items: int) -> None:
try: try:
with SessionLocal() as db: with SessionLocal() as db:
job = db.get(ScanJob, target.job_id) job = db.get(ScanJob, job_id)
if job: if job:
job.scan_activity = activity job.scan_activity = activity
if items > 0: if items > 0:
job.items_scanned += items job.items_scanned += items
job.heartbeat_at = datetime.utcnow() job.heartbeat_at = datetime.now(timezone.utc)
job.updated_at = datetime.utcnow() job.updated_at = datetime.now(timezone.utc)
db.commit() db.commit()
except Exception: # noqa: BLE001 except Exception: # noqa: BLE001
pass pass
with ThreadPoolExecutor(max_workers=1) as pool: with ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(scan_site_for_deviations, site_url, auth, progress_callback) future = pool.submit(scan, scan_type, site_url, auth, progress_callback)
try: try:
return future.result(timeout=timeout_sec) return future.result(timeout=timeout_sec)
except FutureTimeoutError as exc: except FutureTimeoutError as exc:

View File

View File

@ -0,0 +1,53 @@
"""Pytest fixtures for Clearview tests.
Uses an in-memory SQLite database. Schema is created from the SQLAlchemy
metadata directly (the Alembic migrations target Postgres types like JSONB).
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
import pytest
from sqlalchemy import create_engine, event
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool
SRC = Path(__file__).resolve().parents[1] / "src"
sys.path.insert(0, str(SRC))
os.environ.setdefault("DATABASE_URL", "sqlite+pysqlite:///:memory:")
os.environ.setdefault("COOKIE_SECURE", "false")
@pytest.fixture()
def db_engine():
engine = create_engine(
"sqlite+pysqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
future=True,
)
@event.listens_for(engine, "connect")
def _fk_on(dbapi_conn, _):
cur = dbapi_conn.cursor()
cur.execute("PRAGMA foreign_keys=ON")
cur.close()
from clearview_app.auth.models import Base as AuthBase
AuthBase.metadata.create_all(engine)
yield engine
engine.dispose()
@pytest.fixture()
def db_session(db_engine):
Session = sessionmaker(bind=db_engine, autoflush=False, autocommit=False, future=True)
s = Session()
try:
yield s
finally:
s.close()

View File

@ -0,0 +1,76 @@
from fastapi import FastAPI
from fastapi.testclient import TestClient
from sqlalchemy.orm import sessionmaker
from clearview_app.auth.dependencies import get_db
from clearview_app.auth.router import router as auth_router
def make_app(db_engine):
Session = sessionmaker(bind=db_engine, autoflush=False, autocommit=False, future=True)
def override_get_db():
s = Session()
try:
yield s
finally:
s.close()
app = FastAPI()
app.include_router(auth_router)
app.dependency_overrides[get_db] = override_get_db
return app
def test_setup_required_when_empty(db_engine):
c = TestClient(make_app(db_engine))
assert c.get("/api/auth/setup-required").json() == {"setup_required": True}
def test_setup_creates_first_admin_and_logs_in(db_engine):
c = TestClient(make_app(db_engine))
r = c.post("/api/auth/setup", json={"username": "root", "password": "CorrectHorse42"})
assert r.status_code == 200
assert r.cookies.get("clearview_session")
me = c.get("/api/auth/me")
assert me.status_code == 200
assert me.json() == {"username": "root", "role": "admin"}
def test_setup_rejects_when_users_exist(db_engine):
app = make_app(db_engine)
TestClient(app).post("/api/auth/setup", json={"username": "root", "password": "CorrectHorse42"})
r = TestClient(app).post("/api/auth/setup", json={"username": "x", "password": "CorrectHorse42"})
assert r.status_code == 409
def test_login_wrong_password_returns_401(db_engine):
app = make_app(db_engine)
TestClient(app).post("/api/auth/setup", json={"username": "root", "password": "CorrectHorse42"})
r = TestClient(app).post("/api/auth/login", json={"username": "root", "password": "WrongPass00X", "remember": False})
assert r.status_code == 401
def test_login_success_sets_cookie(db_engine):
app = make_app(db_engine)
TestClient(app).post("/api/auth/setup", json={"username": "root", "password": "CorrectHorse42"})
c2 = TestClient(app)
r = c2.post("/api/auth/login", json={"username": "root", "password": "CorrectHorse42", "remember": True})
assert r.status_code == 200
assert c2.cookies.get("clearview_session")
assert c2.get("/api/auth/me").json()["username"] == "root"
def test_logout_invalidates_session(db_engine):
app = make_app(db_engine)
c = TestClient(app)
c.post("/api/auth/setup", json={"username": "root", "password": "CorrectHorse42"})
assert c.get("/api/auth/me").status_code == 200
c.post("/api/auth/logout")
assert c.get("/api/auth/me").status_code == 401
def test_password_policy_enforced_on_setup(db_engine):
c = TestClient(make_app(db_engine))
r = c.post("/api/auth/setup", json={"username": "root", "password": "short"})
assert r.status_code == 400

View File

@ -0,0 +1,91 @@
import pytest
from fastapi import Depends, FastAPI
from fastapi.testclient import TestClient
from sqlalchemy.orm import sessionmaker
from clearview_app.auth import sessions as S
from clearview_app.auth.dependencies import (
AuthedUser,
get_db,
require_admin,
require_user,
)
from clearview_app.auth.models import User
@pytest.fixture()
def app_and_client(db_engine):
Session = sessionmaker(bind=db_engine, autoflush=False, autocommit=False, future=True)
def override_get_db():
s = Session()
try:
yield s
finally:
s.close()
app = FastAPI()
@app.get("/who")
def who(u: AuthedUser = Depends(require_user)):
return {"id": u.id, "role": u.role}
@app.get("/admin-only")
def admin_only(u: AuthedUser = Depends(require_admin)):
return {"ok": True}
app.dependency_overrides[get_db] = override_get_db
return app, Session
def _make_user(Session, role: str, username: str = "x"):
s = Session()
u = User(username=username, password_hash="h", role=role)
s.add(u); s.commit(); s.refresh(u); s.close()
return u
def _login(Session, user_id: int) -> str:
s = Session()
sid, _ = S.create_session(s, user_id=user_id, remember=False, ip=None, user_agent=None)
s.commit(); s.close()
return sid
def test_anon_gets_401(app_and_client):
app, _ = app_and_client
assert TestClient(app).get("/who").status_code == 401
def test_user_can_access_require_user(app_and_client):
app, Session = app_and_client
u = _make_user(Session, "user")
sid = _login(Session, u.id)
c = TestClient(app); c.cookies.set("clearview_session", sid)
r = c.get("/who")
assert r.status_code == 200 and r.json()["role"] == "user"
def test_user_blocked_from_admin(app_and_client):
app, Session = app_and_client
u = _make_user(Session, "user")
sid = _login(Session, u.id)
c = TestClient(app); c.cookies.set("clearview_session", sid)
assert c.get("/admin-only").status_code == 403
def test_admin_allowed(app_and_client):
app, Session = app_and_client
u = _make_user(Session, "admin")
sid = _login(Session, u.id)
c = TestClient(app); c.cookies.set("clearview_session", sid)
assert c.get("/admin-only").status_code == 200
def test_inactive_user_rejected(app_and_client):
app, Session = app_and_client
u = _make_user(Session, "admin")
s = Session(); s.get(User, u.id).is_active = False; s.commit(); s.close()
sid = _login(Session, u.id)
c = TestClient(app); c.cookies.set("clearview_session", sid)
assert c.get("/who").status_code == 401

View File

@ -0,0 +1,24 @@
"""Smoke check that existing routers refuse anonymous requests once gated."""
from fastapi import Depends, FastAPI
from fastapi.testclient import TestClient
from sqlalchemy.orm import sessionmaker
from clearview_app.api_tenants import router as tenants_router
from clearview_app.auth.dependencies import get_db, require_user
def test_tenants_route_requires_auth(db_engine):
Session = sessionmaker(bind=db_engine, autoflush=False, autocommit=False, future=True)
def override_get_db():
s = Session()
try:
yield s
finally:
s.close()
app = FastAPI()
app.include_router(tenants_router, dependencies=[Depends(require_user)])
app.dependency_overrides[get_db] = override_get_db
assert TestClient(app).get("/api/tenants").status_code == 401

View File

@ -0,0 +1,44 @@
from datetime import datetime, timedelta, timezone
from clearview_app.auth.models import AuthAudit, User, UserSession
def test_user_defaults(db_session):
u = User(username="alice", password_hash="x", role="admin")
db_session.add(u); db_session.commit(); db_session.refresh(u)
assert u.id is not None
assert u.is_active is True
assert isinstance(u.created_at, datetime)
def test_session_persists_with_expiry(db_session):
u = User(username="bob", password_hash="x", role="user")
db_session.add(u); db_session.commit(); db_session.refresh(u)
s = UserSession(
id="abc123",
user_id=u.id,
expires_at=datetime.now(timezone.utc) + timedelta(hours=8),
ip="1.2.3.4",
user_agent="ua",
remember=False,
)
db_session.add(s); db_session.commit()
assert s.created_at is not None
def test_audit_row(db_session):
a = AuthAudit(event="login_ok", ip="9.9.9.9", detail={"k": "v"})
db_session.add(a); db_session.commit()
assert a.id is not None
assert a.detail == {"k": "v"}
def test_record_event_persists(db_session):
from clearview_app.auth.audit import record_event
record_event(db_session, event="login_ok", user_id=None, ip="1.1.1.1", detail={"u": "x"})
db_session.commit()
rows = db_session.query(AuthAudit).all()
assert len(rows) == 1
assert rows[0].event == "login_ok"
assert rows[0].detail == {"u": "x"}

View File

@ -0,0 +1,37 @@
import pytest
from clearview_app.auth.security import (
PasswordPolicyError,
hash_password,
new_session_id,
validate_password,
verify_password,
)
def test_hash_and_verify_roundtrip():
h = hash_password("CorrectHorse42")
assert verify_password("CorrectHorse42", h) is True
assert verify_password("wrong", h) is False
def test_verify_returns_false_on_garbage_hash():
assert verify_password("anything", "not-a-real-hash") is False
@pytest.mark.parametrize("pw", ["short1A", "alllowercase", "ALLUPPERCASE", "12345678901234"])
def test_policy_rejects(pw):
with pytest.raises(PasswordPolicyError):
validate_password(pw)
@pytest.mark.parametrize("pw", ["CorrectHorse42", "abcdefghij12"])
def test_policy_accepts(pw):
validate_password(pw)
def test_new_session_id_unique_and_hex():
a = new_session_id()
b = new_session_id()
assert a != b
assert len(a) == 32 and all(c in "0123456789abcdef" for c in a)

View File

@ -0,0 +1,79 @@
from datetime import datetime, timedelta, timezone
import pytest
from clearview_app.auth import sessions as S
from clearview_app.auth.models import User, UserSession
@pytest.fixture()
def user(db_session):
u = User(username="alice", password_hash="x", role="admin")
db_session.add(u); db_session.commit(); db_session.refresh(u)
return u
def test_create_session_sliding(db_session, user):
sid, expires = S.create_session(db_session, user_id=user.id, remember=False, ip=None, user_agent=None)
db_session.commit()
assert len(sid) == 32
row = db_session.get(UserSession, sid)
assert row.remember is False
delta = row.expires_at - datetime.now(timezone.utc)
assert timedelta(hours=7, minutes=55) < delta < timedelta(hours=8, minutes=5)
def test_create_session_remember(db_session, user):
sid, _ = S.create_session(db_session, user_id=user.id, remember=True, ip=None, user_agent=None)
db_session.commit()
row = db_session.get(UserSession, sid)
delta = row.expires_at - datetime.now(timezone.utc)
assert delta > timedelta(days=29)
def test_lookup_refresh_sliding_extends(db_session, user):
sid, _ = S.create_session(db_session, user_id=user.id, remember=False, ip=None, user_agent=None)
db_session.commit()
row = db_session.get(UserSession, sid)
row.expires_at = datetime.now(timezone.utc) + timedelta(minutes=5)
db_session.commit()
looked = S.lookup_and_refresh(db_session, sid)
db_session.commit()
assert looked is not None
assert looked.expires_at - datetime.now(timezone.utc) > timedelta(hours=7)
def test_lookup_refresh_remember_does_not_slide(db_session, user):
sid, _ = S.create_session(db_session, user_id=user.id, remember=True, ip=None, user_agent=None)
db_session.commit()
before = db_session.get(UserSession, sid).expires_at
S.lookup_and_refresh(db_session, sid)
db_session.commit()
after = db_session.get(UserSession, sid).expires_at
assert before == after
def test_expired_session_returns_none(db_session, user):
sid, _ = S.create_session(db_session, user_id=user.id, remember=False, ip=None, user_agent=None)
row = db_session.get(UserSession, sid)
row.expires_at = datetime.now(timezone.utc) - timedelta(minutes=1)
db_session.commit()
assert S.lookup_and_refresh(db_session, sid) is None
def test_revoke(db_session, user):
sid, _ = S.create_session(db_session, user_id=user.id, remember=False, ip=None, user_agent=None)
db_session.commit()
S.revoke(db_session, sid); db_session.commit()
assert db_session.get(UserSession, sid) is None
def test_purge_expired(db_session, user):
fresh, _ = S.create_session(db_session, user_id=user.id, remember=False, ip=None, user_agent=None)
stale, _ = S.create_session(db_session, user_id=user.id, remember=False, ip=None, user_agent=None)
db_session.get(UserSession, stale).expires_at = datetime.now(timezone.utc) - timedelta(hours=1)
db_session.commit()
removed = S.purge_expired(db_session); db_session.commit()
assert removed == 1
assert db_session.get(UserSession, fresh) is not None
assert db_session.get(UserSession, stale) is None

View File

@ -0,0 +1,74 @@
from fastapi import FastAPI
from fastapi.testclient import TestClient
from sqlalchemy.orm import sessionmaker
from clearview_app.auth.dependencies import get_db
from clearview_app.auth.router import router as auth_router
from clearview_app.auth.users_router import router as users_router
def make_app(db_engine):
Session = sessionmaker(bind=db_engine, autoflush=False, autocommit=False, future=True)
def override_get_db():
s = Session()
try:
yield s
finally:
s.close()
app = FastAPI()
app.include_router(auth_router)
app.include_router(users_router)
app.dependency_overrides[get_db] = override_get_db
return app
def _bootstrap_admin(app):
c = TestClient(app)
c.post("/api/auth/setup", json={"username": "root", "password": "CorrectHorse42"})
return c
def test_non_admin_blocked(db_engine):
app = make_app(db_engine)
admin = _bootstrap_admin(app)
admin.post("/api/users", json={"username": "joe", "password": "JoePassword42", "role": "user"})
user = TestClient(app)
user.post("/api/auth/login", json={"username": "joe", "password": "JoePassword42", "remember": False})
assert user.get("/api/users").status_code == 403
def test_create_list_update_delete(db_engine):
app = make_app(db_engine)
c = _bootstrap_admin(app)
r = c.post("/api/users", json={"username": "alice", "password": "AlicePassword42", "role": "user"})
assert r.status_code == 200
uid = r.json()["id"]
assert {u["username"] for u in c.get("/api/users").json()} == {"root", "alice"}
c.patch(f"/api/users/{uid}", json={"role": "admin"})
assert next(u for u in c.get("/api/users").json() if u["id"] == uid)["role"] == "admin"
c.post(f"/api/users/{uid}/reset-password", json={"password": "NewAlicePass42"})
fresh = TestClient(app)
assert fresh.post("/api/auth/login", json={"username": "alice", "password": "NewAlicePass42", "remember": False}).status_code == 200
c.delete(f"/api/users/{uid}")
assert {u["username"] for u in c.get("/api/users").json()} == {"root"}
def test_cannot_delete_self(db_engine):
app = make_app(db_engine)
c = _bootstrap_admin(app)
me_id = c.get("/api/users").json()[0]["id"]
assert c.delete(f"/api/users/{me_id}").status_code == 400
def test_audit_returns_rows(db_engine):
app = make_app(db_engine)
c = _bootstrap_admin(app)
rows = c.get("/api/audit").json()
assert any(r["event"] == "setup" for r in rows)

View File

@ -2,7 +2,11 @@
## Scope ## Scope
Clearview scans SharePoint sites for permission deviations from the site root permission baseline. Clearview scans Microsoft 365 for permission deviations across two domains:
1. **SharePoint sites** — deviations relative to the site root permission baseline (libraries, folders, files).
2. **Exchange Online mailboxes** — non-default permissions: Full Access, Send As, Send on Behalf, and folder delegations (Calendar, Inbox).
Designed to monitor multiple customer tenants from a single instance. Designed to monitor multiple customer tenants from a single instance.
## Runtime Architecture ## Runtime Architecture
@ -16,13 +20,20 @@ All services are defined in `stack/docker-compose.yml` for Portainer deployment.
## Application Layout ## Application Layout
- `containers/clearview/site/` - `containers/clearview/site/`
- Frontend UI (tenant management, manual URL input, CSV import, jobs, deviations) - Frontend UI: vanilla HTML/JS/CSS with a fixed sidebar and hash-based routing.
- Routes: `#/dashboard`, `#/jobs`, `#/scan/sharepoint`, `#/scan/mailbox`, `#/tenants`, `#/settings`.
- `containers/clearview/src/clearview_app/` - `containers/clearview/src/clearview_app/`
- FastAPI backend - FastAPI backend
- SQLAlchemy models - SQLAlchemy models
- CSV parser - CSV parser (SharePoint URLs and mailbox UPNs)
- Default-site filtering - Default-site filtering (SharePoint only)
- Background worker for long-running scans - Background worker for long-running scans
- `containers/clearview/src/clearview_app/scanners/`
- `common.py``AuthConfig`, `DeviationRecord`, `ScanResult`, `ProbeResult`, shared helpers.
- `sharepoint.py` — SharePoint REST scanner, MSAL token cache, hierarchical dedup, SharingLinks helpers.
- `mailbox.py` — Exchange Online scanner; spawns `pwsh` with the EXO scripts.
- `exo_scripts/` — PowerShell scripts (`probe.ps1`, `get-permissions.ps1`).
- Dispatcher: `scanners.scan(scan_type, target, auth, progress)` and `scanners.probe(scan_type, target, auth)`.
## Multi-Tenant Model ## Multi-Tenant Model
@ -71,6 +82,7 @@ The scanner uses the certificate path when `cert_thumbprint` is present on the t
|---|---| |---|---|
| `client_secret` | Azure client secret (optional when a certificate is available) | | `client_secret` | Azure client secret (optional when a certificate is available) |
| `cert_private_key` | PEM-encoded private key (internal, never exposed via API) | | `cert_private_key` | PEM-encoded private key (internal, never exposed via API) |
| `cert_public_pem` | PEM-encoded public certificate (used to build a PFX for Exchange Online PowerShell) |
| `cert_thumbprint` | SHA-1 thumbprint (used by MSAL) | | `cert_thumbprint` | SHA-1 thumbprint (used by MSAL) |
| `cert_expires_at` | Certificate expiry date | | `cert_expires_at` | Certificate expiry date |
@ -85,6 +97,26 @@ Scans run asynchronously through a DB-backed job queue:
5. Background worker processes targets with retries and per-target timeout. 5. Background worker processes targets with retries and per-target timeout.
6. API/UI expose progress and deviations per job. 6. API/UI expose progress and deviations per job.
### Connection Preflight
Before the full scan of a target runs, the worker performs a lightweight probe to verify that the configured credentials can actually reach the site and read role assignments. This catches the common setup errors (missing admin consent, certificate not yet uploaded to Azure, wrong tenant/client ID) early and with a clear message, instead of producing a silent 401 during the full scan.
The probe issues two calls:
1. `GET /_api/web?$select=Title` — validates token + tenant + site URL.
2. `GET /_api/web/roleassignments?$top=1&$select=PrincipalId` — validates that the app actually has permission to read role assignments (not only basic read).
The result is persisted per target in `last_probe_at`, `last_probe_ok`, and `last_probe_message`. If the probe fails, the target is marked `failed` with `error_message = "Preflight: <hint>"` and the full scan is skipped. Hints interpret common HTTP codes:
| Code | Hint |
|---|---|
| 401 on `/_api/web` | Certificate not uploaded in Azure, or wrong tenant/client ID |
| 401 on `/roleassignments` | Admin consent missing, or granted permission too low |
| 403 | App has no access to this site (e.g. `Sites.Selected` without a per-site grant) |
| 404 | Site not found |
The same probe is exposed as an on-demand **Test connection** action on each target in the Job Details UI (see API Endpoints below). The action is blocked while the job is still queued or running.
### Timeout and Retry Controls ### Timeout and Retry Controls
Configured through environment variables (defaults shown): Configured through environment variables (defaults shown):
@ -162,6 +194,7 @@ GET /api/scan-jobs/{id} Get job detail (targets
POST /api/scan-jobs/{id}/cancel Cancel a queued or running job POST /api/scan-jobs/{id}/cancel Cancel a queued or running job
DELETE /api/scan-jobs/{id} Delete a completed job and all its data DELETE /api/scan-jobs/{id} Delete a completed job and all its data
POST /api/scan-jobs/{id}/resolve-sharing-links Resolve SharingLinks group members post-scan POST /api/scan-jobs/{id}/resolve-sharing-links Resolve SharingLinks group members post-scan
POST /api/scan-jobs/{id}/targets/{tid}/test-connection Re-run the connection preflight for one target
GET /api/scan-jobs/{id}/export Download deviations as .xlsx (optional ?site_url=) GET /api/scan-jobs/{id}/export Download deviations as .xlsx (optional ?site_url=)
``` ```
@ -189,23 +222,77 @@ Main tables:
| Table | Key columns | | Table | Key columns |
|---|---| |---|---|
| `tenant_profiles` | credentials, `cert_private_key`, `cert_thumbprint`, `cert_expires_at` | | `tenant_profiles` | credentials, `cert_private_key`, `cert_public_pem`, `cert_thumbprint`, `cert_expires_at` |
| `scan_jobs` | `status`, `tenant_profile_id`, progress counters, auth credentials | | `scan_jobs` | `status`, `scan_type` (`sharepoint`/`mailbox`), `tenant_profile_id`, progress counters, auth credentials |
| `scan_targets` | `job_id`, `site_url`, `status`, `attempts`, `error_message` | | `scan_targets` | `job_id`, `site_url` (holds UPN for mailbox jobs), `status`, `attempts`, `error_message`, `last_probe_at`, `last_probe_ok`, `last_probe_message` |
| `permission_deviations` | `job_id`, `site_url`, `object_url`, `object_type`, `principal`, `role_name`, `delta_type`, `resolved_members` | | `permission_deviations` | `job_id`, `site_url`, `object_url`, `object_type`, `principal`, `role_name`, `delta_type`, `permission_type`, `resolved_members` |
Scan jobs, targets, and deviations are cascade-deleted when a job is removed via `DELETE /api/scan-jobs/{id}`. Jobs with status `queued` or `running` cannot be deleted. Scan jobs, targets, and deviations are cascade-deleted when a job is removed via `DELETE /api/scan-jobs/{id}`. Jobs with status `queued` or `running` cannot be deleted.
Schema migrations for new columns are applied automatically on startup via `_ensure_schema_columns()` in `main.py`. Schema migrations for new columns are applied automatically on startup via `_ensure_schema_columns()` in `main.py`.
## Mailbox Scanning
Mailbox scans use Exchange Online PowerShell with certificate-based app-only auth.
### What is collected
| Permission | PowerShell source | `permission_type` value |
|---|---|---|
| Full Access (and other mailbox-level rights) | `Get-MailboxPermission` | `FullAccess` |
| Send As | `Get-RecipientPermission` (`AccessControlType=Allow`) | `SendAs` |
| Send on Behalf | mailbox property `GrantSendOnBehalfTo` | `SendOnBehalf` |
| Folder delegation — Calendar | `Get-MailboxFolderPermission "<upn>:\Calendar"` | `Folder:Calendar` |
| Folder delegation — Inbox | `Get-MailboxFolderPermission "<upn>:\Inbox"` | `Folder:Inbox` |
The scanner filters out `NT AUTHORITY\SELF`, `S-1-5-*` SIDs, inherited mailbox permissions, and the default folder principals (`Default`, `Anonymous` with `None` rights). What remains is stored as deviations on the job — there is no SharePoint-style root baseline; every non-default principal counts.
### Authentication
Mailbox scanning uses the **same tenant certificate** as SharePoint, but Exchange Online requires a `.pfx` rather than a thumbprint + raw private key. At scan time Clearview builds an in-memory PFX from `cert_private_key` + `cert_public_pem` (random password), writes it to a tempdir, and removes it immediately after the `pwsh` process exits.
### Targets
Three ways to seed a mailbox scan job:
1. **Manual UPNs** — paste one UPN per line.
2. **CSV import** — column `UserPrincipalName` / `Email` / `Mailbox` / `Primary SMTP Address` (auto-detected, case-insensitive).
3. **All mailboxes in tenant** — Clearview enumerates every mailbox via `Get-EXOMailbox -ResultSize Unlimited` and queues one target per mailbox. Requires the tenant's primary domain (e.g. `contoso.onmicrosoft.com`) so `Connect-ExchangeOnline -Organization` can authenticate. Capped at 50000 mailboxes per job.
### Required Azure permissions
In addition to the SharePoint setup the scan app needs:
- API permission: **Office 365 Exchange Online → Application permissions → `Exchange.ManageAsApp`** (admin-consented).
- Entra role assigned to the app's service principal: **Exchange Administrator** (cannot be granted via Microsoft Graph; must be assigned in Azure Portal → Entra ID → Roles and administrators).
### Runtime requirements
The container image installs:
- **PowerShell 7 (`pwsh`)** from the official Microsoft package repo.
- **`ExchangeOnlineManagement`** module from PSGallery (`Install-Module -Scope AllUsers`).
Adds roughly 150 MB to the image. Without these, mailbox probes return `pwsh not available in runtime` and scans fail.
### Probe
Mailbox preflight runs `probe.ps1` which connects to Exchange Online and calls `Get-EXOMailbox -Identity <upn> -PropertySets Minimum`. Failure hints map common errors:
| Error fragment | Hint |
|---|---|
| `Unauthorized` / `401` / `AADSTS*` | Check `Exchange.ManageAsApp` permission, admin consent, and the Exchange Administrator role assignment |
| `Couldn't find object` / `not found` | Mailbox does not exist in this tenant |
| `module not available` | `ExchangeOnlineManagement` PS module missing in the container |
## Build and Release ## Build and Release
Use `./build-and-push.sh` from repo root. `./build-and-push.sh` from the repo root, sourced from the shared script in `/docker/develop/shared-integrations/tooling/docker-build-and-push/`.
- `./build-and-push.sh t` for test build (`:dev` tag only) - `./build-and-push.sh t` — test build, push `:dev` tag only.
- `./build-and-push.sh 1` patch release - `./build-and-push.sh r` — release build, parses the version from `docs/changelog.md` (first `## vX.Y.Z` heading), pushes `:<version>`, `:dev`, and `:latest`.
- `./build-and-push.sh 2` minor release
- `./build-and-push.sh 3` major release The script performs no git operations. After a successful release, run the `git commit` / `git tag` / `git push --tags` commands the script prints in its summary.
## Current Scan Mode ## Current Scan Mode

View File

@ -2,6 +2,236 @@
This file documents changes on the develop branch of this project. This file documents changes on the develop branch of this project.
## 2026-05-28 — Released as v0.2.0
## 2026-05-28 — Release: drop unused `version.txt`
### Changed
- Removed `version.txt` at repo root. The build script reads the release version from `docs/changelog.md` and the running app reads it from `clearview_app/version.py`; the legacy `version.txt` had no remaining consumer.
## 2026-05-28 — Settings: move Users + Audit under Settings tabs
### Changed
- Removed the top-level **Users** link from the sidebar.
- `Settings` page is now tabbed: **General** (env-var pointer), **Users** (user management), **Audit log** (latest 100 events). Sub-routes `#/settings/general`, `#/settings/users`, `#/settings/audit` are direct-linkable.
- Non-admins only see the **General** tab; the Users/Audit tabs are hidden client-side and the backend still enforces `require_admin`.
## 2026-05-28 — Authentication: fix login loop on HTTP-only deployment
### Changed
- `COOKIE_SECURE` now defaults to `false` (the stack runs HTTP-only locally; with `Secure` set, browsers silently drop the session cookie and the user enters a redirect loop). Set `COOKIE_SECURE=true` if the stack is ever fronted by HTTPS.
- SPA boot now hides the dashboard until `/api/auth/me` resolves (via `html[data-auth-pending]` + body visibility), eliminating the brief dashboard flash that appeared before the redirect.
## 2026-05-28 — Authentication: align login/setup/modal styling with site light theme
### Changed
- Login + setup pages and admin modals now use the site's light theme tokens (`--cv-page`, `--cv-surface`, `--cv-text-primary`, `--cv-accent`) instead of the dark placeholder palette.
- Input text uses `--cv-text-primary` so it is actually readable; buttons use the accent gradient.
- User badge / Users + Audit tables use the same border, surface, and text tokens as the rest of the app.
## 2026-05-28 — Authentication: feature complete
### Added
- End-to-end session-based authentication with admin/user roles, initial-setup flow, user management UI, and audit log.
### Changed
- All scanning, tenant, and onboarding endpoints now require an authenticated session.
- Build bumped to `v0.1.0.3`.
## 2026-05-28 — Authentication: session purge
### Added
- Worker periodically deletes expired rows from `user_sessions` (every 5 minutes).
## 2026-05-28 — Authentication: Users + Audit UI
### Added
- Users admin view with create/edit/delete and password reset.
- Audit log view (latest 100 events).
## 2026-05-28 — Authentication: SPA gate
### Added
- App boot calls `/api/auth/me`; 401 redirects to `/login.html` (or `/setup.html`).
- Header user-badge with sign-out button.
- Users nav link (hidden for non-admin).
### Changed
- `requestJson` wrapper now redirects on any 401 response.
## 2026-05-28 — Authentication: setup page
### Added
- `setup.html` for first-run admin creation, reachable only while the `users` table is empty.
## 2026-05-28 — Authentication: login page
### Added
- `login.html` with username / password / remember-me.
- Shared `auth.js` helpers (postJson, getJson).
- CSS for auth pages and the header user-badge.
## 2026-05-28 — Authentication: API gating
### Changed
- Tenants, Jobs, and Onboarding routers now require an authenticated session.
- Auth and Users routers wired into the FastAPI app.
## 2026-05-28 — Authentication: users + audit endpoints
### Added
- Admin endpoints: list/create/update/delete users, reset password, view audit log.
- Self-delete protection; deactivating or resetting password revokes existing sessions.
## 2026-05-28 — Authentication: auth router
### Added
- `/api/auth/setup-required`, `/api/auth/setup`, `/api/auth/login`, `/api/auth/logout`, `/api/auth/me`.
- HttpOnly session cookie with SameSite=Lax; Secure flag controlled by `COOKIE_SECURE` env.
## 2026-05-28 — Authentication: FastAPI dependencies
### Added
- `require_user` / `require_admin` cookie-based session loading.
- 401 for missing/expired/inactive; 403 for non-admin on admin routes.
## 2026-05-28 — Authentication: session lifecycle
### Added
- `auth.sessions` with 8h sliding / 30d remember TTLs, lookup-and-refresh, revoke, purge.
- `UTCDateTime` type decorator in `auth.models` to keep UTC-aware datetimes across SQLite roundtrips.
## 2026-05-28 — Authentication: audit helper
### Added
- `auth.audit.record_event()` for one-line writes to `auth_audit`.
## 2026-05-28 — Authentication: database migration
### Added
- Alembic migration `0003_auth_tables` creating `users`, `user_sessions`, `auth_audit`.
## 2026-05-28 — Authentication: hashing + password policy
### Added
- Argon2id password hashing (`hash_password`, `verify_password`).
- Server-side password policy (min 12, letter + digit).
- Opaque hex session-id generator.
## 2026-05-28 — Authentication: data models
### Added
- `User`, `UserSession`, `AuthAudit` SQLAlchemy models.
- Model-level tests using SQLite in-memory engine.
## 2026-05-28 — Authentication: scaffold
### Added
- `argon2-cffi`, `pytest`, `httpx` dependencies.
- New `clearview_app/auth/` package skeleton.
- `tests/` directory with SQLite-backed pytest fixtures.
## 2026-05-26 — UI/UX: dead CSS removal, a11y, distinct risk colours, richer dashboard
### Added
- **Dashboard enrichment** — a fourth KPI card **With errors** (`#statErrors`, counts jobs that are `completed_with_errors` or have `failed_targets > 0`) and a **Recent jobs** panel (`#dashRecentJobs`, last 5 jobs, each row clickable to jump to its details). Populated from the existing `/api/scan-jobs` list in `refreshJobs()` via a new `renderDashRecent()`; all interpolated fields run through `escHtml()`.
### Changed
- **Removed dead CSS** — the pre-sidebar `.topbar`, `.topbar-actions`, and `.layout` rules (and their now-orphaned references inside the 930px/640px media queries) were deleted; the layout has used `.app-shell`/`.sidebar`/`.content` since the sidebar refactor.
- **Accessibility** — focus outline strengthened from `rgba(14,165,233,0.38)` to a solid `var(--cv-accent)` (meets WCAG non-text 3:1) and now also covers `a:focus-visible`. On route changes (`applyRoute`), focus now moves to the new page's first heading (`h1/h2`, `tabindex=-1`) and `document.title` updates, so screen-reader/keyboard users land in the freshly shown content.
- **Distinct risk colours** — the `risk.warn` badge changed from accent-blue (indistinguishable from `info`/`low`) to amber (`#854d0e` on `rgba(234,179,8,.18)`), giving a real low→high colour gradient.
- **Consistent XSS escaping**`job.id` and `job.source_type` in the Scan Jobs table are now passed through `escHtml()` (previously interpolated raw), matching the rest of the table.
## 2026-05-26 — Split monolithic main.py into route modules
### Changed
- **`main.py` reduced from 1152 to 64 lines** — now a composition root that only wires the FastAPI app, scan-worker lifecycle, `/healthz`, `/api/version`, the `/` index + static mount, and `include_router` for the new route modules. All endpoint logic moved out verbatim (behaviour-preserving).
- **New route modules** (flat modules at package level so existing single-dot relative imports stay unchanged — lower risk than a `routers/` subpackage): `api_tenants.py` (tenant profiles + certificate), `api_jobs.py` (all scan-job routes incl. CSV import, cancel/delete, resolve-sharing-links, resolve-groups, test-connection, Excel export, detail), `api_onboarding.py` (Microsoft connect/callback/scan-app). Shared helpers (`_resolve_credentials`, `_create_job_from_targets`, `_enumerate_all_*`, `_to_job_summary`, `_to_tenant_item`, `_build_export_filename`, `_sharing_link_risk_label`, `_extract_sharing_link_group_and_type`) extracted to `api_helpers.py`.
- **Verified behaviour-preserving** — captured the OpenAPI route set before/after; both expose the identical 22 endpoints (`diff` empty). Built the image, booted against a fresh DB: `/healthz`, `/api/version`, `/api/tenants`, `/api/scan-jobs` all respond, invalid `scan_type` still returns 422, no startup errors.
## 2026-05-26 — Correctness P1: token cache, atomic job claim, timezone-aware datetimes, scan_type validation
### Changed
- **Token cache now has TTL + thread lock + MSAL app reuse** (`scanners/sharepoint.py`) — `_TOKEN_CACHE` previously stored access tokens as plain strings forever, so long scans started failing with 401s once the ~1h token expired. It now stores `(token, expires_at)` and refreshes 60s before expiry, guarded by a new `_TOKEN_LOCK` (the worker fetches tokens from multiple threads). New `_get_msal_app()` caches one `ConfidentialClientApplication` per `(tenant, client, auth_method)` so MSAL's own token cache is reused instead of building a fresh app on every call.
- **Atomic job claim** (`worker.py`) — the queued-job selection now uses `.with_for_update(skip_locked=True)` (`SELECT … FOR UPDATE SKIP LOCKED`), so multiple worker threads/replicas can never claim the same job. Behaviour is unchanged for the current single worker but is now replica-safe.
- **Timezone-aware datetimes everywhere** — replaced all 24 `datetime.utcnow()` (naive, deprecated) with `datetime.now(timezone.utc)` across `models.py`, `worker.py`, `main.py`, and `cert.py`. SQLAlchemy datetime columns are now `DateTime(timezone=True)`; model defaults use a new `_utcnow()` helper. New Alembic migration `0002_timestamptz` converts existing `timestamp without time zone` columns to `timestamptz` (reinterpreting stored values as UTC), guarded per-column so it is a no-op on databases already timestamptz. **Behaviour note:** API datetimes now carry a UTC offset, so the frontend renders them correctly in local time (previously stored UTC was shown as if local).
- **`scan_type` request validation** (`schemas.py`) — `CreateScanJobRequest.scan_type` is now `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` instead of free `str`; invalid values return HTTP 422. The response model keeps `str` so legacy rows never trigger a serialization error. Verified: `scan_type=bogus` → 422, valid type passes schema validation.
## 2026-05-26 — Alembic migrations replace startup `create_all` + raw ALTERs
### Added
- **Alembic introduced (`alembic==1.14.0`)** — schema is now version-controlled instead of being patched at every startup. New `clearview_app/migrations/` package (`env.py` reuses the app's SQLAlchemy engine and `Base.metadata`; `versions/0001_baseline.py` baseline) and dev-only `containers/clearview/alembic.ini` for manual CLI use. The app builds the Alembic `Config` programmatically, so `alembic.ini` is not shipped in the image.
- **Baseline migration `0001_baseline`** — creates the full current schema via `Base.metadata.create_all`, guaranteed identical to the models (the same DDL the app emitted before). Future schema changes become explicit Alembic revisions.
- **Startup bootstrap `clearview_app/db_migrate.run_migrations()`** — idempotent, three cases: fresh DB → `upgrade head`; existing pre-Alembic DB (tables present, no `alembic_version`) → `stamp head` (adopt baseline without re-creating); already under Alembic → `upgrade head`. Verified end-to-end against throwaway databases (fresh upgrade, existing-DB stamp, re-run no-op) and a local image boot test (`/healthz` OK, schema + `alembic_version=0001_baseline`).
### Changed
- **`main.py` startup** — `on_startup()` now calls `run_migrations()` instead of `Base.metadata.create_all(bind=engine)` + `_ensure_schema_columns()`. The 18-statement raw `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` block (`_ensure_schema_columns`) is removed; unused `Base`/`engine` imports dropped. The existing dev/prod database is adopted automatically (stamped to baseline) on first start of the new build — no manual migration step required.
## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
### Added
- **Version metadata module `clearview_app/version.py`** — single source of truth mirroring Dropkeep: `VERSION = "v0.1.0"` (release) + `BUILD = 0` (explicit dev/test build segment, source state, not git-derived). `display_version()` returns `vX.Y.Z.N` when `BUILD > 0`, else `vX.Y.Z`; `cache_version()` strips the leading `v`.
- **`GET /api/version` endpoint** — returns `{"version": display_version()}`. The FastAPI app `version=` is also sourced from `version.py` (was hardcoded `"0.1.0"`).
- **Version shown in the UI** — the sidebar footer version (previously a hardcoded `v0.1.0` in `index.html`) is now populated at load time from `/api/version` via a new `loadVersion()` in `app.js` (span `id="appVersion"`). Operators see exactly which image build is running, e.g. `v0.1.0.3`.
- **Build wrapper `build.sh` + `scripts/`**`./build.sh t` runs `scripts/bump-dev-build.py` (increments `BUILD`) then `./build-and-push.sh t`; `./build.sh r` runs `scripts/check-release-version.py` (asserts `BUILD == 0` and that `version.py` matches the top `docs/changelog.md` release heading) then `./build-and-push.sh r`. `scripts/set-release-version.py vX.Y.Z` sets a new release version and resets `BUILD = 0`. Build numbers are committed in source so the image carries the exact build with no Docker build args.
## 2026-05-26 — Root report: expand Entra/M365 groups & readable direct users
### Added
- **Entra/AAD & M365 group expansion at site root** — the "Resolve groups" action now also expands Azure AD security groups and Microsoft 365 groups that are assigned **directly** at the site root, not just classic SharePoint site groups. Previously these claim-encoded principals (`c:0t.c|tenant|<guid>`, `c:0o.c|federateddirectoryclaimprovider|<guid>`) were skipped by `is_sharepoint_group_principal`, so the root report showed only the group name and never the people inside — making the inventory incomplete. New helpers in `scanners/sharepoint.py`: `_extract_aad_group_object_id` (parses the Entra object id out of the claim, incl. the `_o` owners suffix), `is_aad_group_principal`, `resolve_aad_group_members`, and `_expand_aad_group_by_id` (extracted from `_expand_aad_group_via_graph` so both mail-based and id-based lookups share the `/groups/{id}/members` + `/owners` Graph path, depth-limited to 3 with a per-resolve `seen` set). `POST /api/scan-jobs/{id}/resolve-groups` now routes AAD/M365 group principals to the Graph resolver and SharePoint groups to the existing `getbyname` resolver. Requires `GroupMember.Read.All` (or `Group.Read.All`) on Microsoft Graph; without it the group stays visible by name and counts as "skipped" — no crash.
### Changed
- **Readable principals for directly-assigned users** — individual users granted rights directly on the site root now render as their UPN/email (e.g. `jan@contoso.com`) instead of the raw claim string `i:0#.f|membership|jan@contoso.com`. New helpers `_extract_user_upn` and `_display_principal` in `scanners/sharepoint.py`, applied in `_get_role_assignments` (so both the root scan and the deviation scan benefit, consistently on both sides of the root-vs-child set comparison). Only users with an `@`-shaped UPN are rewritten; groups, on-prem (`i:0#.w|domain\\user`) and built-in/system accounts keep their original LoginName so claim object ids stay resolvable and the site-root noise filter (`SHAREPOINT\\system`, `NT AUTHORITY\\*`, etc.) keeps matching.
## [2026-04-28]
### Changed
- **Excel export sheet name + columns adapt to scan type** — second sheet is now named `Mailbox Permissions` for mailbox jobs, `Group Memberships` for Entra-group jobs, `Root Permissions` for SharePoint-root jobs, and `Deviations` for the original SharePoint deviation scan. Column sets are tailored per type so headers like "Object URL" / "Link Risk" / "Delta" no longer appear on exports where they don't apply. Targets sheet first column label switches between Site URL / Mailbox / Group based on the job.
### Added
- **Entra Group Scan** — new scan type `entra_groups` dedicated to enumerating Microsoft 365 / Azure AD group memberships. New `scanners/entra.py` resolves a target (Object ID, mail, or display name) via Microsoft Graph and stores one deviation per user with role `Member` or `Owner` (with `(via group > nested-group)` chain when expanded recursively). Group classification (Microsoft 365 / Security / Mail-enabled Security / Distribution) is stored in `permission_type`. New helper `entra.list_all_groups` for the "All groups in tenant" option. New CSV parser `parse_entra_groups_csv` reads the `Object ID` column from the Entra portal Groups export. New sidebar route `#/scan/entra` with three forms (manual IDs, CSV import, all-tenant). New filter option in the Scan Jobs type dropdown. Job Details renders Group / Group Type / User / Role columns for these jobs. Requires `Group.Read.All` on Microsoft Graph.
- **Recursive group expansion via Microsoft Graph** — when a SharePoint group member is itself a Microsoft 365 / Azure AD group, the resolver now expands it transitively. New helpers `_expand_aad_group_via_graph` and `_graph_collect` in `scanners/sharepoint.py` call `/groups?$filter=mail eq …` to look up the group, then `/groups/{id}/members` and `/groups/{id}/owners` to enumerate users. Owners are tagged with `(owner)` in the output. Recursion is depth-limited to 3 with a per-resolve `seen` set to break cycles. Output format puts nested members in square brackets after the group name, e.g. `Pharmacology@contoso.onmicrosoft.com [alice@contoso.com, bob@contoso.com (owner)]`. Requires the new `Group.Read.All` Application permission on Microsoft Graph (added to the onboarding instructions). Without it, group lines remain collapsed and labelled `(group, no readable members)`.
- **Resolve SharePoint groups** — new "Resolve groups" action on the Job Details panel for SharePoint and SharePoint-root jobs. Expands every SharePoint group principal (Owners / Members / Visitors / custom site groups) to its underlying user list via `/_api/web/sitegroups/getbyname/<group>/users` and writes the comma-separated members to `permission_deviations.resolved_members`. Members are rendered below the principal in the Deviations table and included in the Excel export. Azure AD security groups and federated claims (principals starting with `c:0…` / `i:0…` or containing `|`) are skipped — those would need `Group.Read.All` on Microsoft Graph. New endpoint `POST /api/scan-jobs/{id}/resolve-groups`, helper `sharepoint.is_sharepoint_group_principal()`.
- **SharePoint root-permissions scan mode** — new `scan_type='sharepoint_root'` that lists role assignments on the site root only, without traversing libraries/folders/files. Much faster (~1 HTTP call per target) and useful for an inventory of who has site-level access. New scanner function `sharepoint.scan_site_root_permissions`. Records are stored with `delta_type='root'` and `object_type='Site'`. Selectable on the New SharePoint Scan page via a "Scan mode" dropdown that controls both the manual-URL and CSV-import forms. New filter option in the Scan Jobs type filter. Noise filter `_is_noise_principal` excludes SharingLinks groups, `SHAREPOINT\system`/`NT AUTHORITY\*` accounts, and "Limited Access System Group" entries — these are SharePoint plumbing surfaced at site-root by spotted-item shares and are not part of a meaningful root inventory.
- **Tenant `primary_domain` field** — new column on `tenant_profiles`, exposed in the Add Tenant form (e.g. `contoso.onmicrosoft.com`). When set, the Mailbox scan page auto-fills the Organization field on tenant selection, and the API falls back to it when `organization` is omitted on a `scan_all_mailboxes` request. SharePoint scans are unaffected.
- **Expanded mailbox-scan onboarding instructions** — new "Enable mailbox scanning" section in the Add Tenant form covers adding the `Exchange.ManageAsApp` API permission, granting admin consent, assigning the Exchange Administrator Entra role to the service principal, certificate generation/upload, and primary-domain entry. Always visible (independent of automated/manual onboarding mode).
- **Scan all mailboxes in a tenant** — third option on the Mailbox scan page next to manual UPNs and CSV import. Clearview enumerates every mailbox via `Get-EXOMailbox -ResultSize Unlimited` and queues one target per mailbox. Requires the tenant's primary domain (e.g. `contoso.onmicrosoft.com`) and a tenant certificate. New PowerShell script `exo_scripts/list-mailboxes.ps1`, new Python helper `mailbox.list_mailboxes()`, new request fields `scan_all_mailboxes` and `organization`. Job source type is recorded as `tenant_all`.
### Changed
- **Sidebar logo** — replaced with a dark-background variant (`assets/clearview-logo-dark.svg`) so the "view" wordmark stays legible on the dark sidebar (previously rendered in `#141413` and was invisible).
- **English-only UI** — replaced remaining Dutch labels in the application with English equivalents: probe status `Nog niet getest`/`Mislukt` → `Not tested yet`/`Failed`, button label `Testen…``Testing…`, error toast `Test mislukt:``Test failed:`, and probe hints in `scanners/sharepoint.py` + `scanners/mailbox.py`. The Dutch→English role-name mapping table in `sharepoint.py` is unchanged (it normalizes incoming SharePoint role names).
- **Mailbox permission scanning** — Clearview can now scan Exchange Online mailboxes for delegated access alongside SharePoint sites.
- Permission categories collected: Full Access (`Get-MailboxPermission`), Send As (`Get-RecipientPermission`), Send on Behalf (`GrantSendOnBehalfTo` mailbox property), and folder delegations on Calendar and Inbox (`Get-MailboxFolderPermission`).
- Implementation: `pwsh` subprocess invoking the `ExchangeOnlineManagement` module with certificate-based app-only authentication (same tenant profile cert as SharePoint scans).
- Default principals (`NT AUTHORITY\SELF`, `S-1-5-*`, folder `Default`/`Anonymous=None`) are filtered out at scan time; only non-default permissions become deviations.
- Mailbox scans require a tenant certificate plus the `Office 365 Exchange Online → Exchange.ManageAsApp` API permission and the **Exchange Administrator** Entra role on the scan app's service principal. Client-secret auth is not supported by Exchange Online.
- **Frontend sidebar layout** — single-page UI replaced with a fixed left sidebar (200px, dark) and routed pages, mirroring the AlertHub layout convention.
- Routes via hash-based router: `#/dashboard`, `#/jobs`, `#/scan/sharepoint`, `#/scan/mailbox`, `#/tenants`, `#/settings`. Implementation stays vanilla HTML/JS/CSS (no React introduction).
- Job Details panel adapts column labels and headers based on `scan_type`: SharePoint shows Site/Object/Type/Principal/Role/Delta; Mailbox shows Mailbox/Object/Permission Type/Principal/Access Rights. SharingLinks resolution is hidden for mailbox jobs.
- Jobs list gets a **Type** column (SharePoint / Mailbox) and a type filter.
- **Scanners package**`clearview_app/scanner.py` split into `clearview_app/scanners/{__init__.py, common.py, sharepoint.py, mailbox.py, exo_scripts/}`. Public dispatcher `scanners.scan(scan_type, target, auth, progress)` and `scanners.probe(scan_type, target, auth)`. The original `scanner.py` remains as a thin compatibility shim re-exporting the SharePoint API.
- **Datamodel changes** (auto-migrated on startup):
- `scan_jobs.scan_type VARCHAR(32) NOT NULL DEFAULT 'sharepoint'`
- `permission_deviations.permission_type VARCHAR(32)` — populated by mailbox scans (`FullAccess`, `SendAs`, `SendOnBehalf`, `Folder:Calendar`, `Folder:Inbox`)
- `tenant_profiles.cert_public_pem TEXT` — public PEM is now stored alongside the private key so the mailbox scanner can build a `.pfx` for `Connect-ExchangeOnline -CertificateFilePath`. Existing tenants need to regenerate the certificate before mailbox scanning is available; SharePoint scans keep working with the existing key.
- **Mailbox CSV import**`parse_mailboxes_csv` accepts `UserPrincipalName` / `UPN` / `Email` / `Mailbox` / `Primary SMTP Address` columns with case-insensitive matching, dedup, and email-shape validation.
- **API additions**:
- `POST /api/scan-jobs` payload extended with `scan_type` and `mailboxes[]` next to the existing `site_urls[]`.
- `POST /api/scan-jobs/import-csv` accepts a `scan_type` form field (`sharepoint`|`mailbox`).
- `GET /api/scan-jobs?scan_type=…` filter.
- `ScanJobSummary.scan_type` and `PermissionDeviationItem.permission_type` returned.
- **Dockerfile** now installs Microsoft PowerShell 7 from the official Microsoft repository plus the `ExchangeOnlineManagement` PowerShell module from PSGallery. Adds ~150 MB to the image.
- **Build script migration** — replaced the local `build-and-push.sh` with the shared version from `/docker/develop/shared-integrations/tooling/docker-build-and-push/`. Reads the version from `docs/changelog.md` (release-summary file) instead of `version.txt`.
- **`docs/changelog.md`** — new release-summary changelog file used by the new build script. The development log (`changelog-develop.md`) remains the append-only source of truth for individual changes.
## [2026-04-23]
### Added
- **Connection preflight per scan target** — before a target is scanned, a lightweight probe validates that the configured credentials can reach the site and read role assignments (`/_api/web` + `/_api/web/roleassignments?$top=1`). Targets that fail preflight are marked `failed` with a clear reason (401/403/404 hints) instead of attempting the full scan. Fixes the previous silent-failure behaviour when admin consent or the certificate upload was missing in Azure.
- **Manual "Test" button** — new button in the Targets table in Job Details that re-runs the probe on demand. New endpoint: `POST /api/scan-jobs/{id}/targets/{target_id}/test-connection`. Blocked while the job is still queued or running.
- **Probe status in UI** — each target row shows the last probe result (OK / Mislukt / Nog niet getest) with timestamp and error message. Fields persist until the next test, so "last known status" remains visible even after permissions are later revoked.
- `scan_targets` table extended with `last_probe_at`, `last_probe_ok`, `last_probe_message` (auto-migrated on startup).
## [2026-04-13] ## [2026-04-13]
### Added ### Added

40
docs/changelog.md Normal file
View File

@ -0,0 +1,40 @@
# Clearview changelog
This file is the **release-summary** changelog used by `build-and-push.sh` to determine the current version. The first heading must be the most recent release in the form `## vX.Y.Z — YYYY-MM-DD`.
For day-by-day development history see [`changelog-develop.md`](changelog-develop.md).
## v0.2.0 — 2026-05-28
### Added
- **Authentication** — session-based login (Argon2id, HttpOnly cookie, 8h sliding / 30d remember-me), first-run setup page, admin/user roles, user-management UI under Settings (CRUD + password reset), and an audit log. All API endpoints now require an authenticated session.
- **Mailbox permission scanning** — new `scan_type=mailbox` enumerates Exchange Online delegated access (Full Access, Send As, Send on Behalf, Calendar/Inbox folder delegations) via PowerShell + `ExchangeOnlineManagement`. Supports manual UPNs, CSV import, and "all mailboxes in tenant".
- **Entra Group scanning** — new `scan_type=entra_groups` enumerates Microsoft 365 / Azure AD group memberships (Members + Owners, with nested-group expansion). Manual IDs, CSV import, and "all groups in tenant".
- **SharePoint root-permissions mode**`scan_type=sharepoint_root` inventories site-root role assignments without traversing libraries/folders/files; recursive expansion of SharePoint groups, AAD/M365 groups, and readable UPNs for directly-assigned users.
- **Resolve sharing links / groups** — post-scan actions to expand SharingLinks recipients and SharePoint/AAD group memberships into the deviation rows.
- **Connection preflight per target** with persisted last-probe status and a manual "Test" button.
- **Excel export** of Targets + Deviations sheets, with type-aware sheet names and columns.
- **Tenant `primary_domain` field**, expanded mailbox onboarding instructions, and English-only UI.
- **Sidebar layout** — fixed left sidebar with hash-based routing.
### Changed
- `main.py` split into per-route modules (`api_tenants`, `api_jobs`, `api_onboarding`) with shared helpers; behaviour-preserving.
- Scanner split into `scanners/` package (sharepoint, mailbox, entra).
- Token cache now has TTL + lock + MSAL app reuse; worker job claim is atomic (`SELECT … FOR UPDATE SKIP LOCKED`); all datetimes are timezone-aware UTC (`timestamptz` migration).
- Schema management moved from startup `create_all` to **Alembic migrations** (baseline + timestamptz + auth tables).
- Build number visible in the UI; build script reads version from `docs/changelog.md`.
- Dockerfile now installs PowerShell 7 + `ExchangeOnlineManagement` (~150 MB).
### Fixed
- SharePoint REST `$filter=HasUniqueRoleAssignments` (unsupported as OData filter) — now client-side.
- Dead CSS removed; accessibility focus styles strengthened; risk badge colours made distinct.
## v0.1.0 — 2026-04-13
### Added
- Initial Clearview release: SharePoint permission deviation scanning across multiple customer tenants.
- Tenant Profiles with certificate-based or client-secret authentication.
- Asynchronous scan job queue with per-target preflight probe and retry handling.
- Job Details panel with site filter, Excel export, and SharingLinks resolution.
- CSV import of Microsoft Sites export format.
- Two onboarding modes (automated via Graph platform app, or manual).

185
docs/code-review-todo.md Normal file
View File

@ -0,0 +1,185 @@
# Code Review TODO — Clearview
**Aangemaakt:** 2026-05-19
**Branch bij review:** `refactor/scanner-package-frontend`
**Scope:** Eerste volledige review (~7.100 regels code)
**Totaal:** 13 CRITICAL · 19 HIGH · 14 MEDIUM · 1 LOW
Werkvolgorde: alle CRITICAL eerst (P0), daarna HIGH (P1), dan MEDIUM/LOW (P2).
Per item staan severity, bestand(en):regel(s), en de gewenste fix.
---
## P0 — CRITICAL (eerst dichten)
### Auth & secrets
- [ ] **Geen authenticatie op enig API-endpoint**
- `containers/clearview/src/clearview_app/main.py` (alle `/api/` routes)
- Fix: API-key via `X-API-Key` header met FastAPI `Security()` dependency, of Bearer-token op alle `/api/` routes.
- [ ] **Client secrets staan plaintext in DB**
- `containers/clearview/src/clearview_app/models.py:21` (`TenantProfile.client_secret`)
- `containers/clearview/src/clearview_app/models.py:45` (`ScanJob.auth_client_secret`)
- Fix: encrypt-at-rest met `cryptography.fernet`; key via env var. Decrypt enkel in geheugen bij gebruik.
- [ ] **`.env` niet in `.gitignore`**
- `.gitignore`, `stack/.env`
- Fix: voeg `stack/.env` en `**/.env` toe aan `.gitignore`; lever `stack/.env.example` met placeholders; verifieer dat `.env` nog niet in git history zit (anders rotate credentials).
- [ ] **Hardcoded DB-fallback `clearview:clearview`**
- `containers/clearview/src/clearview_app/config.py:17-19`
- Fix: verwijder default; `raise RuntimeError("DATABASE_URL required")` als env ontbreekt.
- [ ] **Adminer publiek op `0.0.0.0:8081`**
- `stack/docker-compose.yml:44-46`
- Fix: bind aan `127.0.0.1:${ADMINER_PORT:-8081}:8080` of verwijder uit prod-compose.
### Injectie & exfiltratie
- [ ] **XSS via ongescapete velden in `innerHTML`** (3 vindplaatsen)
- `containers/clearview/site/app.js:658-676` (`job.id`, `job.source_type`, `job.items_scanned`)
- `containers/clearview/site/app.js:885-894` (`job.status`, `total/processed/successful/failed_targets`, `items_scanned`)
- `containers/clearview/site/app.js:175-184` (`statusBadge()` zonder escape op `status`)
- Fix: consequent `escHtml()` op alle API-velden, óók ID's en numerieke. Geen uitzonderingen.
- [ ] **Open redirect via `payload.connect_url`**
- `containers/clearview/site/app.js:472`
- Fix: valideer `new URL(payload.connect_url).protocol === 'https:'` + host-allowlist (`login.microsoftonline.com`).
- [ ] **SSRF / token-exfiltratie via `@odata.nextLink`**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:547-553`
- `containers/clearview/src/clearview_app/scanners/entra.py:227-272`
- Fix: vergelijk `urlparse(next_url).netloc == urlparse(original_url).netloc`; gooi anders een `RuntimeError`.
- [ ] **Header injection in `Content-Disposition`**
- `containers/clearview/src/clearview_app/main.py:701-705`
- Fix: type de route-parameter als `uuid.UUID` zodat FastAPI `job_id` valideert; `urllib.parse.quote(filename, safe="")`.
---
## P1 — HIGH
### Correctheid
- [ ] **Token cache zonder TTL — workers crashen na 1 uur**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:34, 512-543`
- Fix: bewaar `expires_at = time.time() + result["expires_in"] - 60`; invalideer in `_get_token_for_host`.
- [ ] **MSAL `ConfidentialClientApplication` per aanroep**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:530`
- Fix: module-level dict `(tenant_id, client_id, auth_method) -> app`. Hergebruik object.
- [ ] **`_TOKEN_CACHE` zonder lock (race in multi-thread)**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:34`
- Fix: `threading.Lock` rond check-then-write, of `functools.lru_cache` + TTL-wrapper.
- [ ] **Race condition in worker: niet-atomaire job-claim**
- `containers/clearview/src/clearview_app/worker.py:48-68`
- Fix: één `UPDATE scan_jobs SET status='running' WHERE id=:id AND status='queued' RETURNING id` met `FOR UPDATE SKIP LOCKED`.
- [ ] **Auto-refresh race in frontend**
- `containers/clearview/site/app.js:1009-1013` + alle `tick()` callsites
- Fix: `AbortController` per render; vorige request cancelen voordat nieuwe gestart wordt.
- [ ] **Event-listener accumulatie / re-render-pattern**
- `containers/clearview/site/app.js:238-270, 678-702`
- Fix: event delegation op stabiele container (`els.jobsTableBody.addEventListener('click', ...)`).
- [ ] **Sequentieel awaiten van onafhankelijke calls**
- `containers/clearview/site/app.js:554-555, 999, 1281, 1364`
- Fix: `await Promise.all([refreshJobs(), refreshSelectedJob()])`.
- [ ] **Niet-afgehandelde floating promise**
- `containers/clearview/site/app.js:1143`
- Fix: `.catch(err => showFeedback(...))` op `testTargetConnection(...)`.
- [ ] **OAuth state-store is in-memory dict (breekt bij `--workers >1`)**
- `containers/clearview/src/clearview_app/onboarding.py:31, 134-145`
- Fix: state opslaan in DB (tabel `oauth_states` met `created_at`, `consumed_at`) of Redis.
- [ ] **`scan_type` ongevalideerd**
- `containers/clearview/src/clearview_app/schemas.py:36`, `main.py:178, 207`
- Fix: `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` in Pydantic schema.
- [ ] **`datetime.utcnow()` deprecated, timezone-naive** (overal)
- `main.py`, `worker.py`, `models.py:26-27`, `cert.py:33`
- Fix: `datetime.now(timezone.utc)`; `DateTime(timezone=True)` in SQLAlchemy-kolommen.
- [ ] **`ThreadPoolExecutor(max_workers=1)` per target**
- `containers/clearview/src/clearview_app/worker.py:307`
- Fix: gedeelde executor; documenteer dat `future.cancel()` lopende scan niet onderbreekt.
- [ ] **Geen throttling-respect bij 429 in item-loop**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:603-619`
- Fix: batching via `$expand=RoleAssignments` of exponential backoff op item-niveau.
### Hardening
- [ ] **Container draait als root**
- `containers/clearview/Dockerfile`
- Fix: `RUN adduser --system --ingroup clearview clearview` + `USER clearview`.
- [ ] **`.deb` van packages.microsoft.com zonder checksum**
- `containers/clearview/Dockerfile:17-19`
- Fix: hardcoded SHA256 + `sha256sum --check`, of officiële GPG-key via `signed-by`.
- [ ] **`Install-Module ExchangeOnlineManagement` zonder versie-pin**
- `containers/clearview/Dockerfile:24-26`
- Fix: `-RequiredVersion 3.7.0` (of huidige geteste versie).
- [ ] **Graph-foutberichten gelekt richting frontend**
- `containers/clearview/src/clearview_app/onboarding.py:170, 188`
- Fix: volledig log naar server-side DEBUG; aan client alleen generieke code + HTTP-status.
- [ ] **OData-filter injection via displayName/mail**
- `containers/clearview/src/clearview_app/scanners/entra.py:178-196`
- Fix: `urllib.parse.quote(cleaned.replace("'", "''"), safe="")`.
- [ ] **PowerShell-args zonder UPN-validatie**
- `containers/clearview/src/clearview_app/scanners/mailbox.py:181-190`
- `containers/clearview/src/clearview_app/scanners/exo_scripts/get-permissions.ps1:107`
- Fix: `re.fullmatch(r"[^@\s]{1,64}@[^@\s]{1,255}", upn)` vóór `_run_pwsh`.
### Architectuur
- [ ] **`main.py` is 1139 regels — splitsen**
- Fix: `routers/{tenants,jobs,onboarding}.py`, `services/job_service.py`, `export.py`.
- [ ] **`sharepoint.py` is 722 regels — splitsen**
- Fix: `_auth.py`, `_http.py`, `sharepoint.py` (publieke scanfuncties), `sharing_links.py`.
- [ ] **`_ensure_schema_columns()` met 18 raw `ALTER TABLE`**
- `containers/clearview/src/clearview_app/main.py:1115-1139`
- Fix: vervang door Alembic; commit baseline-migratie + history.
---
## P2 — MEDIUM / LOW
- [ ] **`app.js` 1586 regels, geen build step** — splits in ES-modules + esbuild/rollup; verwijder IIFE-wrapper.
- [ ] **Focus management bij route-wissel ontbreekt**`app.js:1529-1551` — focus naar `<h2>` van nieuwe route na navigatie.
- [ ] **Focus-outline 38% opacity voldoet niet aan WCAG 3:1**`styles.css:292-296``outline: 2px solid var(--cv-accent)`.
- [ ] **Geen debouncing/abort op `jobSiteFilter`**`app.js:1158-1172``AbortController` per fetch.
- [ ] **`els.submitFeedback` gebruikt voor niet-SharePoint feedback** — `app.js:682, 711` — generiek feedback-element of context-specifiek.
- [ ] **Magic string `'__manual__'`**`app.js:412, 424` — named constant.
- [ ] **Icon-knoppen missen `aria-label`**`app.js:229-231``aria-label` toevoegen.
- [ ] **`scanner.py` is shim zonder waarschuwing** — `DeprecationWarning` of verwijderen.
- [ ] **`except Exception: pass` zonder logging** (meerdere) — minimaal `log.warning(..., exc_info=True)`.
- [ ] **`_resolve_credentials(db, ...)` mist type-hint** — `main.py:935``db: Session`.
- [ ] **`CreateScanAppResponse` lekt secret zonder waarschuwing** — `schemas.py:149-155``show_once: bool` veld + log-suppression voor dit endpoint.
- [ ] **Deviations hard-capped op 1000**`main.py:728` — voeg `total_deviations_count` + `truncated: bool` toe.
- [ ] **Geen `logging` in scanners-package**`import logging; logger = logging.getLogger(__name__)` overal.
- [ ] **`list-mailboxes.ps1` laadt alles in geheugen** — `-ResultSize Unlimited` → paginering of cap.
- [ ] **`cert.py` private key zonder encryptie** — documenteer aanname dat caller encryptie-at-rest doet.
- [ ] **`build-and-push.sh` doet `git add -A`** — `build-and-push.sh:294` — expliciete file-lijst of bevestigingsprompt.
- [ ] **README build-instructies kloppen niet (1/2/3 vs t/r)**`README.md:80-84`.
---
## Werkwijze
1. Werk per checkbox; verwijder geen items maar zet `[x]` als done.
2. Bij oplevering van een batch: append entry in `docs/changelog-develop.md`.
3. CRITICAL items vereisen handmatige verificatie (auth-tests, secret-rotation, XSS-payload checks).
4. Na P0 + P1: hertest met deze TODO als checklist voordat een nieuwe review wordt aangevraagd.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,177 @@
# Clearview — Authentication Design
**Date:** 2026-05-28
**Status:** Approved (brainstorming phase)
**Scope:** Add an authentication layer to the existing Clearview FastAPI + static-frontend application.
---
## 1. Goal
Restrict access to the Clearview UI and API to a small group of named administrators. No self-service registration, no public exposure of endpoints, no multi-tenant user isolation.
## 2. Requirements
| # | Requirement |
|---|---|
| R1 | All existing API routes (`/api/tenants/*`, `/api/jobs/*`, `/api/onboarding/*`) require an authenticated session. |
| R2 | Two roles: `admin` and `user`. Only `admin` can manage users and view the audit log. Both can use the scanner UI. |
| R3 | Server-side sessions using an opaque session ID stored in an HttpOnly cookie. |
| R4 | First admin is created via a one-time **initial-setup** page that is reachable only while the `users` table is empty. No env-var fallback. |
| R5 | Password hashing with **Argon2id** (default parameters via `argon2-cffi`). |
| R6 | Password policy: minimum 12 characters, at least one letter and one digit. Validated server-side. |
| R7 | Session TTL: **8 hours sliding** by default; **30 days fixed** when "remember me" is checked at login. |
| R8 | Audit log persisted in DB and viewable in the UI by admins. |
| R9 | Existing changelog convention applies: append entries to `changelog-develop.md` per change. |
Explicitly **out of scope** for v1: rate limiting / login lockout, password reset via email, MFA, SSO, fine-grained permissions beyond `admin` / `user`.
## 3. Architecture
### 3.1 Backend module layout
A new package `clearview_app/auth/` with focused modules:
| File | Purpose |
|---|---|
| `auth/__init__.py` | Package marker. |
| `auth/models.py` | SQLAlchemy models: `User`, `Session`, `AuthAudit`. |
| `auth/security.py` | Argon2id hashing, password policy validation, session-ID generation. |
| `auth/sessions.py` | Create / lookup / refresh / revoke session records; expiry handling. |
| `auth/audit.py` | Single `record(event, user_id, ip, detail)` helper. |
| `auth/dependencies.py` | FastAPI dependencies: `current_session`, `require_user`, `require_admin`. |
| `auth/router.py` | `POST /api/auth/login`, `POST /api/auth/logout`, `GET /api/auth/me`, `GET /api/auth/setup-required`, `POST /api/auth/setup`. |
| `auth/users_router.py` | Admin-only: `GET/POST/PATCH/DELETE /api/users`, `POST /api/users/{id}/reset-password`, `GET /api/audit`. |
`main.py` wires the new routers **before** the static-files mount and applies `Depends(require_user)` to the three existing routers (`tenants`, `jobs`, `onboarding`).
### 3.2 Database
Three new tables, added via a new Alembic migration in `clearview_app/migrations/versions/`.
```text
users
id int pk
username text unique not null
password_hash text not null -- Argon2id encoded string
role text not null -- 'admin' | 'user'
is_active bool not null default true
created_at timestamptz not null default now()
updated_at timestamptz not null default now()
sessions
id uuid pk -- opaque session id, stored in cookie
user_id int not null fk -> users.id on delete cascade
created_at timestamptz not null default now()
expires_at timestamptz not null
last_seen_at timestamptz not null default now()
ip text
user_agent text
remember bool not null default false
auth_audit
id bigserial pk
ts timestamptz not null default now()
user_id int null fk -> users.id on delete set null
event text not null -- see event list below
ip text
detail jsonb -- e.g. {"username": "alice"} on login_fail
```
Audit events: `login_ok`, `login_fail`, `logout`, `user_create`, `user_update`, `user_delete`, `password_reset`, `setup`.
### 3.3 Session handling
- Cookie name: `clearview_session`. Flags: `HttpOnly`, `SameSite=Lax`, `Secure` (set when request is HTTPS; configurable for local-dev HTTP).
- Cookie value: the `sessions.id` UUIDv4. No data is encoded in the cookie itself.
- On every authenticated request:
1. Read cookie → look up row in `sessions`.
2. If missing or `expires_at <= now()` → 401, delete cookie.
3. If `remember = false` → extend `expires_at = now() + 8h` (sliding).
4. If `remember = true` → leave `expires_at` untouched (fixed 30 days from creation).
5. Update `last_seen_at`.
- Logout: delete the session row and clear the cookie.
- Cleanup: a lightweight purge of expired sessions runs at login time and is also exposed as a periodic task in the existing worker (cheap query: `DELETE FROM sessions WHERE expires_at < now()`).
### 3.4 Initial-setup flow
- `GET /api/auth/setup-required` returns `{"setup_required": true}` iff `COUNT(*) FROM users = 0`.
- `POST /api/auth/setup` accepts `{username, password}`, succeeds **only** when the table is empty, creates the user with `role='admin'`, immediately establishes a session (sets cookie), and writes a `setup` audit row.
- Once any user exists, both endpoints return 409 / `setup_required=false`.
### 3.5 Password policy
- Server-side check before hashing or updating: `len(pw) >= 12 and any(c.isalpha() for c in pw) and any(c.isdigit() for c in pw)`.
- Hashing: `argon2.PasswordHasher()` with library defaults; the encoded string includes salt + parameters, so future tuning is non-breaking.
## 4. Frontend
The frontend is the existing static `site/` (`index.html`, `app.js`, `styles.css`). Changes:
- **New `login.html`** — standalone page, no app-shell. Form with username, password, "remember me" checkbox. Posts to `/api/auth/login`, then redirects to `/`.
- **New `setup.html`** — same shell as login, used when `/api/auth/setup-required` returns true. Posts to `/api/auth/setup`.
- **`app.js`**:
- On boot, call `GET /api/auth/me`. On 401, redirect to `/login` (or `/setup` if `setup-required`).
- Wrap the `fetch` helper so any 401 response triggers a redirect to `/login`.
- Header shows `username (role)` + a **Logout** button that calls `POST /api/auth/logout` then redirects to `/login`.
- **New "Users" sidebar tab** (admin-only): list / add / edit (username, role, active) / delete users, and a **Reset password** action that opens a modal.
- **New "Audit" sub-view under Users** (admin-only): paged table of `auth_audit` rows, newest first, with event-type filter.
- Non-admin users do not see the Users tab; the backend also enforces this independently (defence in depth).
## 5. Data flow — successful login
1. Browser `POST /api/auth/login` with `{username, password, remember}`.
2. Server fetches user by username. If not found or `is_active=false` → 401 + audit `login_fail` with the supplied username.
3. Verify password with Argon2id. On mismatch → 401 + audit `login_fail`.
4. Insert `sessions` row (`remember` flag determines TTL: 8h or 30d).
5. `Set-Cookie: clearview_session=<uuid>; HttpOnly; SameSite=Lax; Secure?; Path=/; Max-Age=<ttl>`.
6. Audit `login_ok`. Return `{username, role}`.
7. Browser then calls `GET /api/auth/me` and renders the app.
## 6. Error handling
| Situation | Response |
|---|---|
| Missing / invalid / expired session cookie on protected endpoint | `401 Unauthorized`, cookie cleared. |
| Authenticated `user` hitting an admin-only endpoint | `403 Forbidden`. |
| Setup endpoint called when users already exist | `409 Conflict`. |
| Password policy violation | `400 Bad Request` with a single human-readable message. |
| Argon2 verification raising any exception | Treated as failed login (no info leak). |
## 7. Testing
Pytest cases (added in the existing test layout, mirroring current patterns):
- Password hashing round-trip; policy validator edge cases.
- Login success, wrong password, unknown user, inactive user.
- Session expiry: sliding 8h refresh vs fixed 30d remember.
- Logout invalidates the session.
- `require_user` and `require_admin` reject correctly.
- Setup endpoint: succeeds when empty, 409 when not empty.
- User CRUD endpoints: admin allowed, `user` role gets 403.
- Audit rows are written for each event.
## 8. Migration & compatibility
- Single forward Alembic migration adds the three tables. No changes to existing tables.
- First deploy on an existing install: the `users` table is empty → users are redirected to `/setup` on first visit.
- No env vars are introduced for credentials. (An optional `CLEARVIEW_COOKIE_SECURE` toggle may be added so local-dev HTTP still works; default `true`.)
## 9. Work breakdown (units of work)
1. DB models + Alembic migration.
2. Auth core: hashing, policy, session create/lookup/refresh/revoke, audit helper.
3. Auth router: login / logout / me / setup-required / setup.
4. Users router + audit endpoint.
5. Apply `require_user` to existing routers; `require_admin` to user/audit routes.
6. Frontend: `login.html`, `setup.html`, fetch-wrapper 401 handling, header user-badge + logout.
7. Frontend: Users tab (CRUD + reset password) and Audit sub-view.
8. Tests for items 25.
9. Append entries to `changelog-develop.md` per change.
## 10. Open items deferred to v2
- Rate limiting / brute-force lockout.
- Email-based password reset.
- MFA / SSO via Microsoft Entra (would reuse existing tenant app-registration plumbing).
- Per-tenant data scoping for non-admin users.

22
scripts/bump-dev-build.py Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""Increment Clearview's explicit dev/test build number."""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
text = VERSION_FILE.read_text()
match = re.search(r"^BUILD = (\d+)\s*$", text, flags=re.MULTILINE)
if not match:
raise SystemExit(f"BUILD assignment not found in {VERSION_FILE}")
next_build = int(match.group(1)) + 1
text = text[: match.start(1)] + str(next_build) + text[match.end(1) :]
VERSION_FILE.write_text(text)
version_match = re.search(r'^VERSION = ["\']([^"\']+)["\']\s*$', text, flags=re.MULTILINE)
version = version_match.group(1) if version_match else "v?.?.?"
print(f"[bump-dev-build] {version}.{next_build}")

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
"""Validate Clearview release version state before a release build."""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
CHANGELOG = ROOT / "docs" / "changelog.md"
ns: dict[str, object] = {}
exec(VERSION_FILE.read_text(), ns)
version = str(ns.get("VERSION", ""))
build = int(ns.get("BUILD", -1))
if build != 0:
raise SystemExit(f"Release builds require BUILD = 0 in {VERSION_FILE}; found BUILD = {build}")
match = re.search(r"^## (v\d+\.\d+\.\d+) — \d{4}-\d{2}-\d{2}\s*$", CHANGELOG.read_text(), flags=re.MULTILINE)
if not match:
raise SystemExit(f"No release heading found in {CHANGELOG}; expected '## vX.Y.Z — YYYY-MM-DD'")
changelog_version = match.group(1)
if changelog_version != version:
raise SystemExit(
f"Version mismatch: {VERSION_FILE} has {version}, but top changelog release is {changelog_version}"
)
print(f"[check-release-version] {version}")

24
scripts/set-release-version.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""Set Clearview's release version and reset the dev/test build segment."""
from __future__ import annotations
import re
import sys
from pathlib import Path
if len(sys.argv) != 2:
raise SystemExit("usage: scripts/set-release-version.py vX.Y.Z")
version = sys.argv[1]
if not re.fullmatch(r"v\d+\.\d+\.\d+", version):
raise SystemExit("version must match vX.Y.Z, for example v0.1.1")
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
text = VERSION_FILE.read_text()
text, n_version = re.subn(r'^VERSION = ["\'][^"\']+["\']\s*$', f'VERSION = "{version}"', text, count=1, flags=re.MULTILINE)
text, n_build = re.subn(r"^BUILD = \d+\s*$", "BUILD = 0", text, count=1, flags=re.MULTILINE)
if n_version != 1 or n_build != 1:
raise SystemExit(f"Could not update VERSION/BUILD in {VERSION_FILE}")
VERSION_FILE.write_text(text)
print(f"[set-release-version] {version}")

View File

@ -1 +0,0 @@
v0.1.0