Compare commits

..

3 Commits

Author SHA1 Message Date
61db7fe4a7 Dev build 2026-05-26 14:09 2026-05-26 14:09:27 +02:00
6741190342 Add Exchange Online PowerShell probe scripts
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 13:51:24 +02:00
e304b2b3d4 Refactor scanner into modular package and add AlertHub-style frontend
- Split scanner.py into scanners/ package (entra, mailbox, sharepoint, common)
- Add Exchange Online PowerShell probe scripts under scanners/exo_scripts
- Frontend overhaul: AlertHub-style sidebar layout, dark logo asset, expanded app.js/index.html/styles.css
- Backend updates across main.py, worker.py, models.py, schemas.py, csv_import.py
- Update Dockerfile and build-and-push.sh
- Update TECHNICAL.md, changelog-develop.md, add summary changelog.md

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 13:49:04 +02:00
29 changed files with 4671 additions and 963 deletions

View File

@ -1,52 +1,166 @@
#!/usr/bin/env bash
set -euo pipefail
# ============================================================================
# build-and-push.sh
#
# Purpose:
# - Build & push Docker images for each service under ./containers/*
# - Branch model: `main` is permanent and tracked by the prod stack
# (image `:latest`). `dev` is a short-lived branch tracked by the dev
# stack (image `:dev`); it is recreated from `main` at the start of each
# cycle and deleted after each release. Tags `vX.Y.Z` cover rollback —
# no `release/<version>` snapshot branches.
# - Two modes:
# t (test) = build & push :dev. Switches to (or creates) the `dev`
# branch from `main` if not already on it, commits any
# pending changes, and pushes `dev` to origin so the dev
# stack picks up the matching compose/config alongside
# the :dev image.
# r (release) = MUST be run from `main`. Build & push :<version>,
# :dev, :latest. Fast-forwards `main` against
# `origin/main`, merges `dev` into `main` if it exists
# (local or remote), creates a `Release vX.Y.Z` commit,
# tags it, pushes `main` and the tag, and deletes the
# `dev` branch locally and on origin. Version is read
# from the top of changelog.md.
#
# Usage:
# ./build-and-push.sh [mode]
# - mode = t -> test build, push :dev only
# - mode = r -> release build, version taken from changelog.md
# - omitted -> prompt (default: t)
#
# Requirements:
# - docs/changelog.md (relative to repo root), with the most recent release
# at the top as:
# ## vX.Y.Z — YYYY-MM-DD
# (the version is parsed from the first such line)
# - One Dockerfile per service under ./containers/<service>/Dockerfile
#
# ----------------------------------------------------------------------------
# Self-update notice (for humans AND for Claude):
#
# The canonical/master copy of this script lives at:
# /docker/develop/shared-integrations/tooling/docker-build-and-push/build-and-push.sh
#
# When this script is copied into a project repo, it may drift from master.
# On startup the script compares its own SCRIPT_VERSION to the master copy and
# warns if a newer version exists. It does NOT auto-overwrite (too risky during
# a release run).
#
# CLAUDE INSTRUCTION:
# If you (Claude) are about to invoke this script in a project repo, first
# check whether the master copy at the path above is newer (different
# SCRIPT_VERSION or differing file contents). If so, copy the master over
# the local copy BEFORE running, then proceed. Mention the update to the user.
# ============================================================================
SCRIPT_VERSION="1.7.0"
SCRIPT_MASTER_PATH="/docker/develop/shared-integrations/tooling/docker-build-and-push/build-and-push.sh"
DOCKER_REGISTRY="gitea.oskamp.info"
DOCKER_NAMESPACE="ivooskamp"
VERSION_FILE="version.txt"
START_VERSION="v0.1.0"
CHANGELOG_FILE="docs/changelog.md"
CONTAINERS_DIR="containers"
LAST_BRANCH_FILE=".last-branch"
BUMP="${1:-}"
if [[ -z "${BUMP}" ]]; then
echo "Select bump type: [1] patch, [2] minor, [3] major, [t] test (default: t)"
read -r BUMP
BUMP="${BUMP:-t}"
fi
if [[ "$BUMP" != "1" && "$BUMP" != "2" && "$BUMP" != "3" && "$BUMP" != "t" ]]; then
echo "[ERROR] Unknown bump type '$BUMP' (use 1, 2, 3, or t)."
exit 1
fi
read_version() {
if [[ -f "$VERSION_FILE" ]]; then
tr -d ' \t\n\r' < "$VERSION_FILE"
else
echo "$START_VERSION"
# --- Self-update check -------------------------------------------------------
# Compare this script to the canonical master copy. If it differs, offer to
# copy master over the local copy and re-exec with the same arguments so the
# build runs against the up-to-date script.
#
# Skip with: SKIP_SELF_UPDATE=1 ./build-and-push.sh ...
self_update_check() {
if [[ "${SKIP_SELF_UPDATE:-0}" == "1" ]]; then
return 0
fi
}
write_version() {
echo "$1" > "$VERSION_FILE"
}
local self_path="${BASH_SOURCE[0]}"
# Resolve to absolute path so a comparison against itself is detected.
local self_abs
self_abs="$(cd "$(dirname "$self_path")" 2>/dev/null && pwd)/$(basename "$self_path")" || self_abs="$self_path"
bump_version() {
local cur="$1"
local kind="$2"
local core="${cur#v}"
IFS='.' read -r MA MI PA <<< "$core"
case "$kind" in
1) PA=$((PA + 1));;
2) MI=$((MI + 1)); PA=0;;
3) MA=$((MA + 1)); MI=0; PA=0;;
*) echo "[ERROR] Unknown bump kind"; exit 1;;
esac
echo "v${MA}.${MI}.${PA}"
}
if [[ "$self_abs" == "$SCRIPT_MASTER_PATH" ]]; then
return 0 # We ARE the master copy.
fi
if [[ ! -f "$SCRIPT_MASTER_PATH" ]]; then
return 0 # Master not reachable from this host; silently skip.
fi
local master_version reason=""
master_version="$(grep -m1 -E '^SCRIPT_VERSION=' "$SCRIPT_MASTER_PATH" | sed -E 's/.*"([^"]+)".*/\1/')"
if [[ -n "$master_version" && "$master_version" != "$SCRIPT_VERSION" ]]; then
reason="version"
elif ! cmp -s "$self_abs" "$SCRIPT_MASTER_PATH"; then
reason="contents"
else
return 0 # Identical to master.
fi
echo "[WARN] Local build-and-push.sh differs from master."
if [[ "$reason" == "version" ]]; then
echo " local : $SCRIPT_VERSION"
echo " master : $master_version ($SCRIPT_MASTER_PATH)"
else
echo " Same SCRIPT_VERSION ($SCRIPT_VERSION) but file contents differ."
echo " master : $SCRIPT_MASTER_PATH"
fi
# Prompt only when stdin is a TTY; in non-interactive runs, abort safely so
# an unattended release never silently runs against a stale script.
if [[ ! -t 0 ]]; then
echo "[ERROR] Non-interactive shell — refusing to auto-update."
echo " Re-run interactively, or set SKIP_SELF_UPDATE=1 to bypass,"
echo " or update manually: cp \"$SCRIPT_MASTER_PATH\" \"$self_abs\""
exit 1
fi
local reply
read -r -p "Update local script from master and re-run? [Y/n] " reply
reply="${reply:-Y}"
if [[ ! "$reply" =~ ^[Yy]$ ]]; then
echo "[INFO] Continuing with local version $SCRIPT_VERSION (not updated)."
echo ""
return 0
fi
if ! cp "$SCRIPT_MASTER_PATH" "$self_abs"; then
echo "[ERROR] Failed to copy master to $self_abs (read-only filesystem?)."
echo " Continuing with local version $SCRIPT_VERSION."
echo ""
return 0
fi
chmod +x "$self_abs" 2>/dev/null || true
echo "[INFO] Updated $self_abs from master. Re-executing..."
echo ""
# Re-exec with original arguments. SKIP_SELF_UPDATE=1 prevents an
# update loop if cp somehow didn't take.
export SKIP_SELF_UPDATE=1
exec "$self_abs" "$@"
}
self_update_check "$@"
# --- Input: prompt if missing ------------------------------------------------
MODE="${1:-}"
if [[ -z "${MODE}" ]]; then
echo "Select build type: [t] test build (push :dev only), [r] release build (default: t)"
read -r MODE
MODE="${MODE:-t}"
fi
case "$MODE" in
t|test) MODE="t" ;;
r|release) MODE="r" ;;
*)
echo "[ERROR] Unknown mode '$MODE' (use 't' for test or 'r' for release)."
exit 1
;;
esac
# --- Helpers -----------------------------------------------------------------
check_docker_ready() {
if ! docker info >/dev/null 2>&1; then
echo "[ERROR] Docker daemon not reachable. Is Docker running and do you have permission to use it?"
@ -57,11 +171,11 @@ check_docker_ready() {
ensure_registry_login() {
local cfg="${HOME}/.docker/config.json"
if [[ ! -f "$cfg" ]]; then
echo "[ERROR] Docker config not found at $cfg. Please login: docker login ${DOCKER_REGISTRY}"
echo "[ERROR] Docker config not found at $cfg. Please login: docker login ${DOCKER_REGISTRY}"
exit 1
fi
if ! grep -q "\"${DOCKER_REGISTRY}\"" "$cfg"; then
echo "[ERROR] No registry auth found for ${DOCKER_REGISTRY}. Please run: docker login ${DOCKER_REGISTRY}"
echo "[ERROR] No registry auth found for ${DOCKER_REGISTRY}. Please run: docker login ${DOCKER_REGISTRY}"
exit 1
fi
}
@ -70,7 +184,7 @@ validate_repo_component() {
local comp="$1"
if [[ ! "$comp" =~ ^[a-z0-9]+([._-][a-z0-9]+)*$ ]]; then
echo "[ERROR] Invalid repository component '$comp'."
echo " Must match: ^[a-z0-9]+([._-][a-z0-9]+)*$"
echo " Must match: ^[a-z0-9]+([._-][a-z0-9]+)*$ (lowercase, digits, ., _, - as separators)."
return 1
fi
}
@ -88,11 +202,33 @@ validate_tag() {
fi
}
if [[ ! -d ".git" ]]; then
echo "[ERROR] Not a git repository (.git missing)."
exit 1
fi
# Parse the first "## vX.Y.Z ..." heading from changelog.md.
# Accepts: ## v1.0.3 — 2026-04-24
# ## v1.0.3 - 2026-04-24
# ## v1.0.3
read_version_from_changelog() {
if [[ ! -f "$CHANGELOG_FILE" ]]; then
echo "[ERROR] $CHANGELOG_FILE not found in $(pwd)." >&2
exit 1
fi
local line
# Match lines starting with "## v<digits>.<digits>.<digits>"
line="$(grep -m1 -E '^##[[:space:]]+v[0-9]+\.[0-9]+\.[0-9]+' "$CHANGELOG_FILE" || true)"
if [[ -z "$line" ]]; then
echo "[ERROR] No release heading found in $CHANGELOG_FILE (expected e.g. '## v1.0.3 — 2026-04-24' near the top)." >&2
exit 1
fi
# Extract the vX.Y.Z token
local version
version="$(echo "$line" | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+' | head -n1)"
if [[ -z "$version" ]]; then
echo "[ERROR] Could not parse version from line: $line" >&2
exit 1
fi
echo "$version"
}
# --- Preflight ---------------------------------------------------------------
if [[ ! -d "$CONTAINERS_DIR" ]]; then
echo "[ERROR] '$CONTAINERS_DIR' directory missing. Expected ./${CONTAINERS_DIR}/<service>/ with a Dockerfile."
exit 1
@ -102,59 +238,107 @@ check_docker_ready
ensure_registry_login
validate_repo_component "$DOCKER_NAMESPACE"
DETECTED_BRANCH="$(git branch --show-current 2>/dev/null || true)"
if [[ -z "$DETECTED_BRANCH" ]]; then
DETECTED_BRANCH="$(git symbolic-ref --quiet --short HEAD 2>/dev/null || true)"
fi
if [[ -z "$DETECTED_BRANCH" ]]; then
DETECTED_BRANCH="main"
fi
UPSTREAM_REF="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || echo "origin/$DETECTED_BRANCH")"
HEAD_SHA="$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")"
LAST_BRANCH_FILE_PATH="$(pwd)/$LAST_BRANCH_FILE"
echo "[INFO] Repo: $(pwd)"
echo "[INFO] Current branch: $DETECTED_BRANCH"
echo "[INFO] Upstream: $UPSTREAM_REF"
echo "[INFO] HEAD (sha): $HEAD_SHA"
CURRENT_VERSION="$(read_version)"
NEW_VERSION="$CURRENT_VERSION"
DO_TAG_AND_BUMP=true
if [[ "$BUMP" == "t" ]]; then
echo "[INFO] Test build: keeping version $CURRENT_VERSION; will only update :dev."
DO_TAG_AND_BUMP=false
# Informational: show branch and HEAD if this happens to be a git repo.
BRANCH_INFO=""
HEAD_INFO=""
if [[ -d ".git" ]]; then
BRANCH_INFO="$(git branch --show-current 2>/dev/null || echo unknown)"
HEAD_INFO="$(git rev-parse --short HEAD 2>/dev/null || echo unknown)"
echo "[INFO] Repo: $(pwd)"
echo "[INFO] Current branch: $BRANCH_INFO"
echo "[INFO] HEAD (sha): $HEAD_INFO"
else
NEW_VERSION="$(bump_version "$CURRENT_VERSION" "$BUMP")"
echo "[INFO] New version: $NEW_VERSION"
echo "[INFO] Repo: $(pwd) (not a git checkout)"
fi
if $DO_TAG_AND_BUMP; then
validate_tag "$NEW_VERSION"
# --- Release preflight (BEFORE any docker work) ------------------------------
# All git-side validation for a release happens here so a wrong-branch / dirty
# tree / stale main / conflicting dev / pre-existing tag aborts the run before
# anything is built or pushed to the registry. dev is merged into main now so
# the version we read from changelog.md reflects the merged state, not main's
# pre-merge state.
VERSION=""
DEV_MERGED=0
if [[ "$MODE" == "r" ]]; then
if [[ ! -d ".git" ]]; then
echo "[ERROR] Release mode requires a git checkout."
exit 1
fi
CURRENT_BRANCH="$(git symbolic-ref --short -q HEAD || echo)"
if [[ "$CURRENT_BRANCH" != "main" ]]; then
echo "[ERROR] Release build must run from 'main' branch. Current: ${CURRENT_BRANCH:-<detached>}."
echo " Switch with: git checkout main"
exit 1
fi
if ! git diff --quiet HEAD -- || ! git diff --cached --quiet; then
echo "[ERROR] Working tree has uncommitted changes. Commit or stash them on the appropriate branch before releasing."
git status --short
exit 1
fi
echo "[INFO] Fetching origin..."
git fetch origin main
if git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
git fetch origin dev
fi
if ! git merge --ff-only origin/main 2>/dev/null; then
echo "[ERROR] Local main has diverged from origin/main. Resolve manually before releasing."
exit 1
fi
# Merge dev into main BEFORE reading the version, so changelog.md reflects
# the bumped state that dev brings in.
if git show-ref --verify --quiet refs/heads/dev; then
echo "[INFO] Merging local dev into main..."
if ! git merge --no-ff dev -m "Release (merge dev)"; then
echo "[ERROR] Merge of dev into main failed (conflict). Resolve manually and re-run."
exit 1
fi
DEV_MERGED=1
elif git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
echo "[INFO] Fetching and merging origin/dev into main..."
git fetch origin dev:dev
if ! git merge --no-ff dev -m "Release (merge dev)"; then
echo "[ERROR] Merge of dev into main failed (conflict). Resolve manually and re-run."
exit 1
fi
DEV_MERGED=1
else
echo "[INFO] No dev branch found — releasing main as-is."
fi
VERSION="$(read_version_from_changelog)"
echo "[INFO] Release version (from $CHANGELOG_FILE, post-merge): $VERSION"
validate_tag "$VERSION"
validate_tag "latest"
# Tag collision = abort. A re-release of an existing version with different
# content would silently move what consumers think v0.X.Y points to.
if git rev-parse -q --verify "refs/tags/${VERSION}" >/dev/null; then
echo "[ERROR] Tag ${VERSION} already exists locally. Bump $CHANGELOG_FILE to a new version before releasing."
exit 1
fi
if git ls-remote --exit-code --tags origin "refs/tags/${VERSION}" >/dev/null 2>&1; then
echo "[ERROR] Tag ${VERSION} already exists on origin. Bump $CHANGELOG_FILE to a new version before releasing."
exit 1
fi
# Ask for confirmation so you never accidentally re-push an old version or a wrong one.
read -r -p "Proceed building & pushing as ${VERSION}? [y/N] " CONFIRM
CONFIRM="${CONFIRM:-N}"
if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then
echo "[INFO] Aborted by user. Note: dev has been merged into local main; reset with 'git reset --hard origin/main' if you want to undo."
exit 0
fi
else
echo "[INFO] Test build: only :dev will be pushed."
fi
validate_tag "dev"
if $DO_TAG_AND_BUMP; then
echo "[INFO] Writing $NEW_VERSION to $VERSION_FILE"
write_version "$NEW_VERSION"
echo "[INFO] Git add + commit (branch: $DETECTED_BRANCH)"
git add "$VERSION_FILE"
git commit -m "Release $NEW_VERSION on branch $DETECTED_BRANCH (bump type $BUMP)"
echo "[INFO] Git tag $NEW_VERSION"
git tag -a "$NEW_VERSION" -m "Release $NEW_VERSION"
echo "[INFO] Git push + tags"
git push origin "$DETECTED_BRANCH"
git push --tags
else
echo "[INFO] Skipping commit/tagging (test build)."
fi
# --- Build & push per service ------------------------------------------------
shopt -s nullglob
services=( "$CONTAINERS_DIR"/* )
if [[ ${#services[@]} -eq 0 ]]; then
@ -178,21 +362,21 @@ for svc_path in "${services[@]}"; do
IMAGE_BASE="${DOCKER_REGISTRY}/${DOCKER_NAMESPACE}/${svc}"
if $DO_TAG_AND_BUMP; then
if [[ "$MODE" == "r" ]]; then
echo "============================================================"
echo "[INFO] Building ${svc} -> tags: ${NEW_VERSION}, dev, latest"
echo "[INFO] Building ${svc} -> tags: ${VERSION}, dev, latest"
echo "============================================================"
docker build \
-t "${IMAGE_BASE}:${NEW_VERSION}" \
-t "${IMAGE_BASE}:${VERSION}" \
-t "${IMAGE_BASE}:dev" \
-t "${IMAGE_BASE}:latest" \
"$svc_path"
docker push "${IMAGE_BASE}:${NEW_VERSION}"
docker push "${IMAGE_BASE}:${VERSION}"
docker push "${IMAGE_BASE}:dev"
docker push "${IMAGE_BASE}:latest"
BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:dev" "${IMAGE_BASE}:latest")
BUILT_IMAGES+=("${IMAGE_BASE}:${VERSION}" "${IMAGE_BASE}:dev" "${IMAGE_BASE}:latest")
else
echo "============================================================"
echo "[INFO] Test build ${svc} -> tag: dev"
@ -203,18 +387,96 @@ for svc_path in "${services[@]}"; do
fi
done
echo "$DETECTED_BRANCH" > "$LAST_BRANCH_FILE_PATH"
# --- Summary -----------------------------------------------------------------
echo ""
echo "============================================================"
echo "[SUMMARY] Build & push complete (branch: $DETECTED_BRANCH)"
if $DO_TAG_AND_BUMP; then
echo "[INFO] Release version: $NEW_VERSION"
if [[ "$MODE" == "r" ]]; then
echo "[SUMMARY] Release build & push complete: $VERSION"
else
echo "[INFO] Test build (no version bump)"
echo "[SUMMARY] Test build & push complete (:dev only)"
fi
if [[ -n "$BRANCH_INFO" ]]; then
echo "[INFO] Branch: $BRANCH_INFO HEAD: $HEAD_INFO"
fi
echo "[INFO] Images pushed:"
for img in "${BUILT_IMAGES[@]}"; do
echo " - $img"
done
echo "============================================================"
echo ""
# --- Git: release commit + tag + push (release mode only) -------------------
# Preflight (branch, clean tree, ff origin/main, dev merge, tag collision,
# version parse) already ran BEFORE the build. dev is already merged into
# local main. We only need to land the Release commit, tag, and push.
if [[ "$MODE" == "r" ]]; then
echo "[INFO] Finalising release: version=${VERSION}"
# Produce a clean Release commit at the tip. Preflight guarantees the working
# tree was clean at start; any post-build artefacts would be unexpected, so
# commit with --allow-empty to keep the release marker isolated.
if git diff --quiet HEAD -- && git diff --cached --quiet; then
git commit --allow-empty -m "Release ${VERSION}"
else
echo "[WARN] Working tree changed during the build — staging and including in release commit."
git add -A
git commit -m "Release ${VERSION}"
fi
git tag -a "${VERSION}" -m "Release ${VERSION}"
# Push main first (triggers prod webhook), then the tag.
git push origin main
git push origin "refs/tags/${VERSION}"
echo "[INFO] Pushed main and tag ${VERSION} to origin."
# Clean up dev branch — local and remote.
if [[ "$DEV_MERGED" == "1" ]]; then
if git show-ref --verify --quiet refs/heads/dev; then
git branch -D dev
echo "[INFO] Deleted local dev branch."
fi
if git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
git push origin --delete dev
echo "[INFO] Deleted remote dev branch."
fi
fi
fi
# --- Git: dev branch commit + push (test mode only) -------------------------
if [[ "$MODE" == "t" ]]; then
if [[ ! -d ".git" ]]; then
echo "[WARN] Not a git checkout — skipping dev branch commit/push."
exit 0
fi
CURRENT_BRANCH="$(git symbolic-ref --short -q HEAD || echo)"
# Ensure we are on the dev branch. Create it if needed.
if [[ "$CURRENT_BRANCH" != "dev" ]]; then
if git show-ref --verify --quiet refs/heads/dev; then
echo "[INFO] Switching to existing local dev branch."
git checkout dev
elif git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
echo "[INFO] Checking out remote dev branch."
git fetch origin dev
git checkout -b dev origin/dev
else
echo "[INFO] Creating new dev branch from main."
git fetch origin main
git checkout -b dev origin/main
fi
fi
# Stage and commit if there are changes.
git add -A
if git diff --cached --quiet; then
echo "[INFO] Working tree clean — pushing current HEAD to dev."
else
git commit -m "Dev build $(date '+%Y-%m-%d %H:%M')"
fi
# Non-force push. Diverged origin/dev fails hard — resolve manually.
git push -u origin dev
echo "[INFO] Pushed dev to origin."
fi

28
build.sh Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
# Clearview build wrapper. Keeps project-specific version handling out of the
# shared build-and-push.sh script.
#
# Usage:
# ./build.sh t # increment explicit dev/test build segment, then push :dev
# ./build.sh r # validate release version state, then run release build
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$repo_root"
mode="${1:-}"
case "$mode" in
t)
./scripts/bump-dev-build.py
;;
r)
./scripts/check-release-version.py
;;
*)
echo "usage: ./build.sh {t|r}" >&2
exit 2
;;
esac
exec ./build-and-push.sh "$@"

View File

@ -1,11 +1,33 @@
FROM python:3.12-slim
FROM python:3.12-slim-bookworm
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app/src
# Suppress PowerShell telemetry inside the container
ENV POWERSHELL_TELEMETRY_OPTOUT=1
ENV DOTNET_CLI_TELEMETRY_OPTOUT=1
WORKDIR /app
# ---------------------------------------------------------------------------
# PowerShell 7 + ExchangeOnlineManagement module
# Required for Exchange Online mailbox permission scanning.
# ---------------------------------------------------------------------------
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates curl \
&& curl -fsSL https://packages.microsoft.com/config/debian/12/packages-microsoft-prod.deb \
-o /tmp/packages-microsoft-prod.deb \
&& dpkg -i /tmp/packages-microsoft-prod.deb \
&& rm /tmp/packages-microsoft-prod.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends powershell \
&& pwsh -NoProfile -NonInteractive -Command \
"Set-PSRepository -Name PSGallery -InstallationPolicy Trusted; \
Install-Module -Name ExchangeOnlineManagement -Scope AllUsers -Force -AllowClobber" \
&& apt-get purge -y curl \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

View File

@ -4,6 +4,18 @@
selectedJobData: null,
refreshTimer: null,
tenants: [],
sharingLinkSelectionByJob: {},
currentRoute: 'dashboard',
};
const ROUTE_TITLES = {
'dashboard': 'Dashboard',
'jobs': 'Scan Jobs',
'scan-sharepoint': 'New SharePoint Scan',
'scan-mailbox': 'New Mailbox Scan',
'scan-entra': 'New Entra Group Scan',
'tenants': 'Tenants',
'settings': 'Settings',
};
const els = {
@ -24,6 +36,7 @@
scanAppDisplayName: document.getElementById('scanAppDisplayName'),
newTenantName: document.getElementById('newTenantName'),
newTenantTenantId: document.getElementById('newTenantTenantId'),
newTenantPrimaryDomain: document.getElementById('newTenantPrimaryDomain'),
newTenantClientId: document.getElementById('newTenantClientId'),
newTenantClientSecret: document.getElementById('newTenantClientSecret'),
saveTenantBtn: document.getElementById('saveTenantBtn'),
@ -43,11 +56,35 @@
csvFile: document.getElementById('csvFile'),
csvSkipDefaults: document.getElementById('csvSkipDefaults'),
submitFeedback: document.getElementById('submitFeedback'),
sharepointScanMode: document.getElementById('sharepointScanMode'),
// Mailbox scan panel
entraScanTenantSelect: document.getElementById('entraScanTenantSelect'),
manualEntraForm: document.getElementById('manualEntraForm'),
csvEntraForm: document.getElementById('csvEntraForm'),
allEntraForm: document.getElementById('allEntraForm'),
manualEntraIds: document.getElementById('manualEntraIds'),
csvEntraFile: document.getElementById('csvEntraFile'),
entraSubmitFeedback: document.getElementById('entraSubmitFeedback'),
mailboxScanTenantSelect: document.getElementById('mailboxScanTenantSelect'),
manualMailboxForm: document.getElementById('manualMailboxForm'),
csvMailboxForm: document.getElementById('csvMailboxForm'),
allMailboxesForm: document.getElementById('allMailboxesForm'),
allMailboxesOrg: document.getElementById('allMailboxesOrg'),
manualMailboxes: document.getElementById('manualMailboxes'),
csvMailboxFile: document.getElementById('csvMailboxFile'),
mailboxSubmitFeedback: document.getElementById('mailboxSubmitFeedback'),
// Jobs panel
refreshJobsBtn: document.getElementById('refreshJobsBtn'),
jobTenantFilter: document.getElementById('jobTenantFilter'),
jobTypeFilter: document.getElementById('jobTypeFilter'),
jobsTableBody: document.getElementById('jobsTableBody'),
jobAutoRefresh: document.getElementById('jobAutoRefresh'),
// Sidebar / routing
contentTitle: document.getElementById('contentTitle'),
appVersion: document.getElementById('appVersion'),
targetsTableHead: document.getElementById('targetsTableHead'),
targetsHeading: document.getElementById('targetsHeading'),
deviationsTableHead: document.getElementById('deviationsTableHead'),
// Job detail panel
targetsTableBody: document.getElementById('targetsTableBody'),
deviationsTableBody: document.getElementById('deviationsTableBody'),
@ -60,6 +97,9 @@
sharingLinksTypes: document.getElementById('sharingLinksTypes'),
resolveSharingLinksBtn: document.getElementById('resolveSharingLinksBtn'),
resolveFeedback: document.getElementById('resolveFeedback'),
resolveGroupsBlock: document.getElementById('resolveGroupsBlock'),
resolveGroupsBtn: document.getElementById('resolveGroupsBtn'),
resolveGroupsFeedback: document.getElementById('resolveGroupsFeedback'),
// Hero stats
statTenants: document.getElementById('statTenants'),
statJobs: document.getElementById('statJobs'),
@ -97,6 +137,43 @@
return date.toLocaleString();
}
function renderProbeStatus(target) {
if (!target.last_probe_at) {
return '<span class="risk info">Not tested yet</span>';
}
const when = formatDate(target.last_probe_at);
const msg = target.last_probe_message || '';
if (target.last_probe_ok) {
return '<span class="risk ok" title="' + escHtml(msg) + '">OK</span> <span class="cell-members">' + escHtml(when) + '</span>';
}
return '<span class="risk critical" title="' + escHtml(msg) + '">Failed</span> <span class="cell-members">' + escHtml(when) + '</span><br><span class="cell-members">' + escHtml(msg) + '</span>';
}
async function testTargetConnection(targetId, button) {
if (!state.selectedJobId) return;
const originalLabel = button.textContent;
button.disabled = true;
button.textContent = 'Testing…';
try {
const resp = await fetch(
'/api/scan-jobs/' + encodeURIComponent(state.selectedJobId) +
'/targets/' + encodeURIComponent(targetId) + '/test-connection',
{ method: 'POST' }
);
if (!resp.ok) {
const body = await resp.text();
throw new Error('HTTP ' + resp.status + ': ' + body);
}
await resp.json();
await refreshSelectedJob();
} catch (err) {
window.alert('Test failed: ' + err.message);
} finally {
button.disabled = false;
button.textContent = originalLabel;
}
}
function statusBadge(status) {
const cls = status === 'completed' ? 'ok'
: status === 'running' ? 'warn'
@ -176,11 +253,9 @@
els.tenantsTableBody.querySelectorAll('[data-tenant-scan]').forEach(function (btn) {
btn.addEventListener('click', function () {
const id = btn.getAttribute('data-tenant-scan');
// Pre-select this tenant in the scan form
els.scanTenantSelect.value = id;
onScanTenantChange();
// Scroll to scan panel
els.manualScanForm.closest('.panel').scrollIntoView({ behavior: 'smooth' });
navigateTo('scan-sharepoint');
});
});
@ -198,7 +273,7 @@
}
function populateTenantDropdowns() {
// Scan tenant select
// SharePoint scan tenant select (supports manual creds)
const scanVal = els.scanTenantSelect.value;
els.scanTenantSelect.innerHTML =
'<option value="">-- Select a tenant --</option>' +
@ -210,6 +285,36 @@
els.scanTenantSelect.value = scanVal;
}
// Entra scan tenant select (cert required for Graph)
if (els.entraScanTenantSelect) {
const ev = els.entraScanTenantSelect.value;
els.entraScanTenantSelect.innerHTML =
'<option value="">-- Select a tenant --</option>' +
state.tenants.map(function (t) {
var label = escHtml(t.name);
if (!t.has_certificate) label += ' (no certificate)';
return '<option value="' + escHtml(t.id) + '"' + (t.has_certificate ? '' : ' disabled') + '>' + label + '</option>';
}).join('');
if (ev) els.entraScanTenantSelect.value = ev;
}
// Mailbox scan tenant select (cert only, no manual creds)
if (els.mailboxScanTenantSelect) {
const mbVal = els.mailboxScanTenantSelect.value;
els.mailboxScanTenantSelect.innerHTML =
'<option value="">-- Select a tenant --</option>' +
state.tenants.map(function (t) {
var label = escHtml(t.name);
if (!t.has_certificate) {
label += ' (no certificate)';
}
return '<option value="' + escHtml(t.id) + '"' + (t.has_certificate ? '' : ' disabled') + '>' + label + '</option>';
}).join('');
if (mbVal) {
els.mailboxScanTenantSelect.value = mbVal;
}
}
// Job tenant filter select
const filterVal = els.jobTenantFilter.value;
els.jobTenantFilter.innerHTML =
@ -244,6 +349,7 @@
async function saveTenant() {
const name = (els.newTenantName.value || '').trim();
const tenantId = (els.newTenantTenantId.value || '').trim();
const primaryDomain = (els.newTenantPrimaryDomain ? els.newTenantPrimaryDomain.value : '').trim().toLowerCase();
const clientId = (els.newTenantClientId.value || '').trim();
const clientSecret = (els.newTenantClientSecret.value || '').trim();
@ -256,7 +362,13 @@
await requestJson('/api/tenants', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: name, tenant_id: tenantId, client_id: clientId, client_secret: clientSecret }),
body: JSON.stringify({
name: name,
tenant_id: tenantId,
primary_domain: primaryDomain || null,
client_id: clientId,
client_secret: clientSecret,
}),
});
showFeedback(els.tenantFeedback, 'Tenant "' + name + '" saved.', 'ok');
closeTenantForm();
@ -287,6 +399,7 @@
els.addTenantBtn.removeAttribute('hidden');
els.newTenantName.value = '';
els.newTenantTenantId.value = '';
if (els.newTenantPrimaryDomain) els.newTenantPrimaryDomain.value = '';
els.newTenantClientId.value = '';
els.newTenantClientSecret.value = '';
if (els.connectedTenantId) els.connectedTenantId.value = '';
@ -371,6 +484,7 @@
if (status === 'connected') {
const tenantId = params.get('tenant_id') || '';
navigateTo('tenants');
openTenantForm();
if (tenantId && els.newTenantTenantId) {
els.newTenantTenantId.value = tenantId;
@ -422,7 +536,8 @@
return;
}
try {
const payload = Object.assign({ site_urls: urls, skip_default_sites: !!els.manualSkipDefaults.checked }, auth);
const mode = (els.sharepointScanMode && els.sharepointScanMode.value) || 'sharepoint';
const payload = Object.assign({ scan_type: mode, site_urls: urls, skip_default_sites: !!els.manualSkipDefaults.checked }, auth);
const result = await requestJson('/api/scan-jobs', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@ -459,8 +574,10 @@
return;
}
const skipDefaults = els.csvSkipDefaults.checked ? 'true' : 'false';
const mode = (els.sharepointScanMode && els.sharepointScanMode.value) || 'sharepoint';
const formData = new FormData();
formData.append('file', file);
formData.append('scan_type', mode);
if (auth.tenant_profile_id) {
formData.append('tenant_profile_id', auth.tenant_profile_id);
} else {
@ -495,10 +612,14 @@
async function refreshJobs() {
const filterTenant = els.jobTenantFilter.value;
const filterType = els.jobTypeFilter ? els.jobTypeFilter.value : '';
let url = '/api/scan-jobs?limit=50';
if (filterTenant) {
url += '&tenant_profile_id=' + encodeURIComponent(filterTenant);
}
if (filterType) {
url += '&scan_type=' + encodeURIComponent(filterType);
}
const jobs = await requestJson(url);
els.statJobs.textContent = String(jobs.length);
@ -507,7 +628,7 @@
}).length);
if (!jobs.length) {
els.jobsTableBody.innerHTML = '<tr><td colspan="8">No jobs yet.</td></tr>';
els.jobsTableBody.innerHTML = '<tr><td colspan="9">No jobs yet.</td></tr>';
return;
}
@ -522,9 +643,21 @@
const tenantLabel = job.tenant_name
? '<span class="tenant-tag">' + escHtml(job.tenant_name) + '</span>'
: '<span style="color:var(--cv-text-secondary);font-size:0.82rem">manual</span>';
const scanType = job.scan_type || 'sharepoint';
var typeLabel;
if (scanType === 'mailbox') {
typeLabel = '<span class="risk info">Mailbox</span>';
} else if (scanType === 'sharepoint_root') {
typeLabel = '<span class="risk warn">SP Root</span>';
} else if (scanType === 'entra_groups') {
typeLabel = '<span class="risk high">Entra</span>';
} else {
typeLabel = '<span class="risk ok">SharePoint</span>';
}
return (
'<tr>' +
'<td><code>' + job.id + '</code></td>' +
'<td>' + typeLabel + '</td>' +
'<td>' + tenantLabel + '</td>' +
'<td>' + job.source_type + '</td>' +
'<td>' + statusBadge(job.status) + '</td>' +
@ -546,6 +679,7 @@
els.jobsTableBody.querySelectorAll('[data-job-inspect]').forEach(function (button) {
button.addEventListener('click', function () {
state.selectedJobId = button.getAttribute('data-job-inspect');
navigateTo('jobs');
refreshSelectedJob().catch(function () {
showFeedback(els.submitFeedback, 'Failed to load selected job details.', 'error');
});
@ -586,7 +720,7 @@
state.selectedJobData = null;
els.selectedJobId.textContent = 'No selection';
els.jobSummary.textContent = 'Select a job to inspect targets and deviations.';
els.targetsTableBody.innerHTML = '<tr><td colspan="4">No job selected.</td></tr>';
els.targetsTableBody.innerHTML = '<tr><td colspan="6">No job selected.</td></tr>';
els.deviationsTableBody.innerHTML = '<tr><td colspan="6">No deviation data yet.</td></tr>';
els.jobSiteFilter.innerHTML = '<option value="">All sites</option>';
els.exportJobBtn.setAttribute('hidden', '');
@ -599,6 +733,9 @@
}
function renderJobTables(job) {
const scanTypeNow = job.scan_type || 'sharepoint';
const isMailbox = scanTypeNow === 'mailbox';
const isEntra = scanTypeNow === 'entra_groups';
const siteFilter = els.jobSiteFilter.value;
const filteredTargets = siteFilter
@ -609,18 +746,91 @@
? job.deviations.filter(function (d) { return d.site_url === siteFilter; })
: job.deviations;
// Header swap based on scan type
if (els.targetsHeading) {
els.targetsHeading.textContent = isMailbox ? 'Mailboxes' : isEntra ? 'Groups' : 'Targets';
}
if (els.targetsTableHead) {
var targetsHead;
if (isMailbox) {
targetsHead = '<tr><th>Mailbox</th><th>Status</th><th>Attempts</th><th>Error</th><th>Connection test</th><th></th></tr>';
} else if (isEntra) {
targetsHead = '<tr><th>Group</th><th>Status</th><th>Attempts</th><th>Error</th><th>Connection test</th><th></th></tr>';
} else {
targetsHead = '<tr><th>URL</th><th>Status</th><th>Attempts</th><th>Error</th><th>Connection test</th><th></th></tr>';
}
els.targetsTableHead.innerHTML = targetsHead;
}
if (els.deviationsTableHead) {
var devHead;
if (isMailbox) {
devHead = '<tr><th>Mailbox</th><th>Object</th><th>Permission Type</th><th>Principal</th><th>Access Rights</th><th></th></tr>';
} else if (isEntra) {
devHead = '<tr><th>Group</th><th>Group Type</th><th>User</th><th>Role</th><th></th><th></th></tr>';
} else {
devHead = '<tr><th>Site</th><th>Object</th><th>Type</th><th>Principal</th><th>Role</th><th>Delta</th></tr>';
}
els.deviationsTableHead.innerHTML = devHead;
}
// Hide SharingLinks/Resolve Groups for non-SharePoint jobs
if (isMailbox || isEntra) {
if (els.sharingLinksResolveBlock) els.sharingLinksResolveBlock.setAttribute('hidden', '');
if (els.resolveGroupsBlock) els.resolveGroupsBlock.setAttribute('hidden', '');
} else if (els.resolveGroupsBlock) {
els.resolveGroupsBlock.removeAttribute('hidden');
}
els.targetsTableBody.innerHTML = filteredTargets.length
? filteredTargets.map(function (target) {
return (
'<tr>' +
'<tr data-target-id="' + target.id + '">' +
'<td>' + escHtml(target.site_url) + '</td>' +
'<td>' + statusBadge(target.status) + '</td>' +
'<td>' + target.attempts + '</td>' +
'<td>' + escHtml(target.error_message || '-') + '</td>' +
'<td class="probe-cell">' + renderProbeStatus(target) + '</td>' +
'<td><button type="button" class="btn btn-outline btn-small probe-btn" data-target-id="' + target.id + '">Test</button></td>' +
'</tr>'
);
}).join('')
: '<tr><td colspan="4">No targets.</td></tr>';
: '<tr><td colspan="6">No targets.</td></tr>';
if (isMailbox) {
els.deviationsTableBody.innerHTML = filteredDeviations.length
? filteredDeviations.map(function (d) {
return (
'<tr>' +
'<td class="col-site">' + escHtml(d.site_url) + '</td>' +
'<td class="col-object">' + escHtml(d.object_url) + '</td>' +
'<td class="col-type">' + escHtml(d.permission_type || d.object_type) + '</td>' +
'<td class="col-principal">' + escHtml(d.principal) + '</td>' +
'<td class="col-role">' + escHtml(d.role_name) + '</td>' +
'<td></td>' +
'</tr>'
);
}).join('')
: '<tr><td colspan="6">No mailbox permissions found for this job.</td></tr>';
return;
}
if (isEntra) {
els.deviationsTableBody.innerHTML = filteredDeviations.length
? filteredDeviations.map(function (d) {
return (
'<tr>' +
'<td class="col-site">' + escHtml(d.object_url) + '</td>' +
'<td class="col-type">' + escHtml(d.permission_type || '') + '</td>' +
'<td class="col-principal">' + escHtml(d.principal) + '</td>' +
'<td class="col-role">' + escHtml(d.role_name) + '</td>' +
'<td></td>' +
'<td></td>' +
'</tr>'
);
}).join('')
: '<tr><td colspan="6">No group memberships found for this job.</td></tr>';
return;
}
els.deviationsTableBody.innerHTML = filteredDeviations.length
? filteredDeviations.map(function (deviation) {
@ -639,7 +849,11 @@
principalCell = '<td class="col-principal" title="' + escHtml(deviation.principal) + '">' + badge + members + '</td>';
} else {
const principalShort = shortPrincipal(deviation.principal);
principalCell = '<td class="col-principal"><span class="cell-truncate" title="' + escHtml(deviation.principal) + '">' + escHtml(principalShort) + '</span></td>';
var membersBlock = '';
if (deviation.resolved_members) {
membersBlock = '<br><span class="cell-members">' + escHtml(deviation.resolved_members) + '</span>';
}
principalCell = '<td class="col-principal"><span class="cell-truncate" title="' + escHtml(deviation.principal) + '">' + escHtml(principalShort) + '</span>' + membersBlock + '</td>';
}
return (
'<tr>' +
@ -700,25 +914,43 @@
els.exportJobBtn.removeAttribute('hidden');
// Build resolve sharing links section
_renderResolveBlock(job);
await _renderResolveBlock(job);
renderJobTables(job);
}
function _renderResolveBlock(job) {
async function _renderResolveBlock(job) {
// Preserve current selection before re-render (auto-refresh runs every few seconds).
if (state.selectedJobId === job.id) {
var currentSelected = Array.from(
els.sharingLinksTypes.querySelectorAll('.sharing-link-type-check:checked')
).map(function (cb) { return cb.value; });
state.sharingLinkSelectionByJob[job.id] = currentSelected;
}
if (job.status === 'queued' || job.status === 'running') {
els.sharingLinksResolveBlock.setAttribute('hidden', '');
return;
}
// Collect unique link types present in this job's deviations
if ((job.scan_type || 'sharepoint') === 'mailbox') {
els.sharingLinksResolveBlock.setAttribute('hidden', '');
return;
}
var typeCounts = {};
job.deviations.forEach(function (dev) {
var lt = sharingLinkType(dev.principal);
if (lt) {
typeCounts[lt] = (typeCounts[lt] || 0) + 1;
}
});
try {
var typeData = await requestJson('/api/scan-jobs/' + encodeURIComponent(job.id) + '/sharing-link-types');
typeCounts = typeData.type_counts || {};
} catch (_err) {
// Fallback to currently loaded deviations when aggregate endpoint fails.
job.deviations.forEach(function (dev) {
var lt = sharingLinkType(dev.principal);
if (lt) {
typeCounts[lt] = (typeCounts[lt] || 0) + 1;
}
});
}
var types = Object.keys(typeCounts);
if (!types.length) {
@ -727,8 +959,15 @@
}
types.sort();
var rememberedSelection = state.sharingLinkSelectionByJob[job.id];
els.sharingLinksTypes.innerHTML = types.map(function (lt) {
var checked = SHARING_LINK_DEFAULT_CHECKED.indexOf(lt) !== -1 ? 'checked' : '';
var isChecked;
if (Array.isArray(rememberedSelection)) {
isChecked = rememberedSelection.indexOf(lt) !== -1;
} else {
isChecked = SHARING_LINK_DEFAULT_CHECKED.indexOf(lt) !== -1;
}
var checked = isChecked ? 'checked' : '';
var riskCls = sharingLinkRiskClass(lt);
return (
'<label class="checkline">' +
@ -739,6 +978,14 @@
);
}).join('');
els.sharingLinksTypes.querySelectorAll('.sharing-link-type-check').forEach(function (cb) {
cb.addEventListener('change', function () {
state.sharingLinkSelectionByJob[job.id] = Array.from(
els.sharingLinksTypes.querySelectorAll('.sharing-link-type-check:checked')
).map(function (x) { return x.value; });
});
});
els.sharingLinksResolveBlock.removeAttribute('hidden');
showFeedback(els.resolveFeedback, '', '');
}
@ -774,9 +1021,26 @@
// Link types that are resolved by default (checked in the UI)
var SHARING_LINK_DEFAULT_CHECKED = ['AnonymousEdit', 'AnonymousView', 'Flexible'];
function extractSharingLinkGroupName(principal) {
if (!principal) return null;
var text = String(principal).trim();
var segments = text.split('|').map(function (s) { return s.trim(); }).filter(Boolean);
for (var i = segments.length - 1; i >= 0; i -= 1) {
if (/^sharinglinks\./i.test(segments[i])) {
return segments[i];
}
}
if (/^sharinglinks\./i.test(text)) {
return text;
}
return null;
}
function sharingLinkType(principal) {
if (!principal || !principal.startsWith('SharingLinks.')) return null;
var parts = principal.split('.');
var groupName = extractSharingLinkGroupName(principal);
if (!groupName) return null;
var parts = groupName.split('.');
return parts.length >= 3 ? parts[2] : null;
}
@ -872,6 +1136,14 @@
els.manualScanForm.addEventListener('submit', createManualJob);
els.csvScanForm.addEventListener('submit', createCsvJob);
els.targetsTableBody.addEventListener('click', function (ev) {
var btn = ev.target.closest('.probe-btn');
if (!btn) return;
var targetId = btn.getAttribute('data-target-id');
if (!targetId) return;
testTargetConnection(targetId, btn);
});
els.refreshJobsBtn.addEventListener('click', function () {
tick().catch(function () {
showFeedback(els.submitFeedback, 'Refresh failed.', 'error');
@ -929,6 +1201,29 @@
});
});
if (els.resolveGroupsBtn) {
els.resolveGroupsBtn.addEventListener('click', function () {
if (!state.selectedJobId) return;
els.resolveGroupsBtn.disabled = true;
showFeedback(els.resolveGroupsFeedback, 'Resolving SharePoint groups…', '');
requestJson('/api/scan-jobs/' + encodeURIComponent(state.selectedJobId) + '/resolve-groups', {
method: 'POST',
}).then(function (result) {
showFeedback(
els.resolveGroupsFeedback,
result.resolved_groups + ' groups resolved, ' + result.skipped_groups + ' skipped (no readable members), ' +
result.updated_deviations + ' deviations updated.',
'ok'
);
return refreshSelectedJob();
}).catch(function (err) {
showFeedback(els.resolveGroupsFeedback, 'Resolve failed: ' + err.message, 'error');
}).finally(function () {
els.resolveGroupsBtn.disabled = false;
});
});
}
els.exportJobBtn.addEventListener('click', function () {
if (!state.selectedJobId) return;
const siteFilter = els.jobSiteFilter.value;
@ -939,10 +1234,356 @@
window.location.href = url;
});
// -------------------------------------------------------------------------
// Mailbox scan creation
// -------------------------------------------------------------------------
function readMailboxScanAuth() {
const val = els.mailboxScanTenantSelect ? els.mailboxScanTenantSelect.value : '';
if (!val) {
throw new Error('Select a tenant profile with a certificate.');
}
return { tenant_profile_id: val };
}
async function createManualMailboxJob(event) {
event.preventDefault();
const upns = (els.manualMailboxes.value || '')
.split(/\r?\n/)
.map(function (line) { return line.trim().toLowerCase(); })
.filter(Boolean);
if (!upns.length) {
showFeedback(els.mailboxSubmitFeedback, 'Enter at least one UPN.', 'error');
return;
}
let auth;
try {
auth = readMailboxScanAuth();
} catch (err) {
showFeedback(els.mailboxSubmitFeedback, err.message, 'error');
return;
}
try {
const payload = Object.assign({ scan_type: 'mailbox', mailboxes: upns, skip_default_sites: false }, auth);
const result = await requestJson('/api/scan-jobs', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
showFeedback(
els.mailboxSubmitFeedback,
'Mailbox job queued: ' + result.job.id + ' | accepted=' + result.accepted_urls.length +
', invalid=' + result.invalid_urls.length,
'ok'
);
els.manualMailboxes.value = '';
state.selectedJobId = result.job.id;
navigateTo('jobs');
await refreshJobs();
await refreshSelectedJob();
} catch (err) {
showFeedback(els.mailboxSubmitFeedback, 'Mailbox scan failed: ' + err.message, 'error');
}
}
async function createCsvMailboxJob(event) {
event.preventDefault();
const file = els.csvMailboxFile.files && els.csvMailboxFile.files[0];
if (!file) {
showFeedback(els.mailboxSubmitFeedback, 'Select a CSV file first.', 'error');
return;
}
let auth;
try {
auth = readMailboxScanAuth();
} catch (err) {
showFeedback(els.mailboxSubmitFeedback, err.message, 'error');
return;
}
const formData = new FormData();
formData.append('file', file);
formData.append('scan_type', 'mailbox');
formData.append('tenant_profile_id', auth.tenant_profile_id);
try {
const result = await requestJson('/api/scan-jobs/import-csv?skip_default_sites=false', {
method: 'POST',
body: formData,
});
showFeedback(
els.mailboxSubmitFeedback,
'CSV mailbox job queued: ' + result.job.id + ' | accepted=' + result.accepted_urls.length +
', invalid=' + result.invalid_urls.length,
'ok'
);
els.csvMailboxFile.value = '';
state.selectedJobId = result.job.id;
navigateTo('jobs');
await refreshJobs();
await refreshSelectedJob();
} catch (err) {
showFeedback(els.mailboxSubmitFeedback, 'CSV import failed: ' + err.message, 'error');
}
}
async function createAllMailboxesJob(event) {
event.preventDefault();
const org = (els.allMailboxesOrg.value || '').trim().toLowerCase();
if (!org || org.indexOf('.') === -1) {
showFeedback(els.mailboxSubmitFeedback, 'Enter the tenant primary domain (e.g. contoso.onmicrosoft.com).', 'error');
return;
}
let auth;
try {
auth = readMailboxScanAuth();
} catch (err) {
showFeedback(els.mailboxSubmitFeedback, err.message, 'error');
return;
}
const submitBtn = els.allMailboxesForm.querySelector('button[type="submit"]');
if (submitBtn) submitBtn.disabled = true;
showFeedback(els.mailboxSubmitFeedback, 'Enumerating all mailboxes — this can take up to a minute…', '');
try {
const payload = Object.assign({
scan_type: 'mailbox',
scan_all_mailboxes: true,
organization: org,
skip_default_sites: false,
}, auth);
const result = await requestJson('/api/scan-jobs', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
showFeedback(
els.mailboxSubmitFeedback,
'All-mailboxes job queued: ' + result.job.id + ' | accepted=' + result.accepted_urls.length,
'ok'
);
state.selectedJobId = result.job.id;
navigateTo('jobs');
await refreshJobs();
await refreshSelectedJob();
} catch (err) {
showFeedback(els.mailboxSubmitFeedback, 'Scan-all failed: ' + err.message, 'error');
} finally {
if (submitBtn) submitBtn.disabled = false;
}
}
if (els.manualMailboxForm) {
els.manualMailboxForm.addEventListener('submit', createManualMailboxJob);
}
if (els.csvMailboxForm) {
els.csvMailboxForm.addEventListener('submit', createCsvMailboxJob);
}
if (els.allMailboxesForm) {
els.allMailboxesForm.addEventListener('submit', createAllMailboxesJob);
}
if (els.mailboxScanTenantSelect) {
els.mailboxScanTenantSelect.addEventListener('change', function () {
var id = els.mailboxScanTenantSelect.value;
var tenant = state.tenants.find(function (t) { return t.id === id; });
if (tenant && tenant.primary_domain && els.allMailboxesOrg) {
els.allMailboxesOrg.value = tenant.primary_domain;
}
});
}
if (els.jobTypeFilter) {
els.jobTypeFilter.addEventListener('change', function () {
tick().catch(function () { /* ignore */ });
});
}
// -------------------------------------------------------------------------
// Entra group scan creation
// -------------------------------------------------------------------------
function readEntraScanAuth() {
const val = els.entraScanTenantSelect ? els.entraScanTenantSelect.value : '';
if (!val) {
throw new Error('Select a tenant profile with a certificate.');
}
return { tenant_profile_id: val };
}
async function createManualEntraJob(event) {
event.preventDefault();
const ids = (els.manualEntraIds.value || '')
.split(/\r?\n/)
.map(function (s) { return s.trim(); })
.filter(Boolean);
if (!ids.length) {
showFeedback(els.entraSubmitFeedback, 'Enter at least one Object ID, mail, or display name.', 'error');
return;
}
let auth;
try { auth = readEntraScanAuth(); } catch (err) {
showFeedback(els.entraSubmitFeedback, err.message, 'error');
return;
}
try {
const payload = Object.assign({ scan_type: 'entra_groups', group_ids: ids, skip_default_sites: false }, auth);
const result = await requestJson('/api/scan-jobs', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
showFeedback(els.entraSubmitFeedback,
'Entra job queued: ' + result.job.id + ' | accepted=' + result.accepted_urls.length +
', invalid=' + result.invalid_urls.length, 'ok');
els.manualEntraIds.value = '';
state.selectedJobId = result.job.id;
navigateTo('jobs');
await refreshJobs();
await refreshSelectedJob();
} catch (err) {
showFeedback(els.entraSubmitFeedback, 'Entra scan failed: ' + err.message, 'error');
}
}
async function createCsvEntraJob(event) {
event.preventDefault();
const file = els.csvEntraFile.files && els.csvEntraFile.files[0];
if (!file) {
showFeedback(els.entraSubmitFeedback, 'Select a CSV file first.', 'error');
return;
}
let auth;
try { auth = readEntraScanAuth(); } catch (err) {
showFeedback(els.entraSubmitFeedback, err.message, 'error');
return;
}
const formData = new FormData();
formData.append('file', file);
formData.append('scan_type', 'entra_groups');
formData.append('tenant_profile_id', auth.tenant_profile_id);
try {
const result = await requestJson('/api/scan-jobs/import-csv?skip_default_sites=false', {
method: 'POST',
body: formData,
});
showFeedback(els.entraSubmitFeedback,
'CSV Entra job queued: ' + result.job.id + ' | accepted=' + result.accepted_urls.length, 'ok');
els.csvEntraFile.value = '';
state.selectedJobId = result.job.id;
navigateTo('jobs');
await refreshJobs();
await refreshSelectedJob();
} catch (err) {
showFeedback(els.entraSubmitFeedback, 'CSV import failed: ' + err.message, 'error');
}
}
async function createAllEntraJob(event) {
event.preventDefault();
let auth;
try { auth = readEntraScanAuth(); } catch (err) {
showFeedback(els.entraSubmitFeedback, err.message, 'error');
return;
}
const submitBtn = els.allEntraForm.querySelector('button[type="submit"]');
if (submitBtn) submitBtn.disabled = true;
showFeedback(els.entraSubmitFeedback, 'Enumerating all groups in tenant — this can take up to two minutes…', '');
try {
const payload = Object.assign({
scan_type: 'entra_groups',
scan_all_groups: true,
skip_default_sites: false,
}, auth);
const result = await requestJson('/api/scan-jobs', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
showFeedback(els.entraSubmitFeedback,
'All-groups job queued: ' + result.job.id + ' | accepted=' + result.accepted_urls.length, 'ok');
state.selectedJobId = result.job.id;
navigateTo('jobs');
await refreshJobs();
await refreshSelectedJob();
} catch (err) {
showFeedback(els.entraSubmitFeedback, 'Scan-all failed: ' + err.message, 'error');
} finally {
if (submitBtn) submitBtn.disabled = false;
}
}
if (els.manualEntraForm) els.manualEntraForm.addEventListener('submit', createManualEntraJob);
if (els.csvEntraForm) els.csvEntraForm.addEventListener('submit', createCsvEntraJob);
if (els.allEntraForm) els.allEntraForm.addEventListener('submit', createAllEntraJob);
// -------------------------------------------------------------------------
// Hash router
// -------------------------------------------------------------------------
function parseRoute() {
var hash = (window.location.hash || '').replace(/^#\/?/, '');
if (!hash) return 'dashboard';
if (hash.indexOf('/') !== -1) {
var parts = hash.split('/');
if (parts[0] === 'scan' && parts[1]) return 'scan-' + parts[1];
return parts[0];
}
return hash;
}
function applyRoute(route) {
if (!ROUTE_TITLES[route]) {
route = 'dashboard';
}
state.currentRoute = route;
document.querySelectorAll('.route-page').forEach(function (page) {
if (page.getAttribute('data-route-page') === route) {
page.removeAttribute('hidden');
} else {
page.setAttribute('hidden', '');
}
});
document.querySelectorAll('.sidebar-nav .nav-link').forEach(function (link) {
if (link.getAttribute('data-route') === route) {
link.classList.add('active');
} else {
link.classList.remove('active');
}
});
if (els.contentTitle) {
els.contentTitle.textContent = ROUTE_TITLES[route];
}
}
function navigateTo(route) {
var hash;
if (route === 'scan-sharepoint') hash = '#/scan/sharepoint';
else if (route === 'scan-mailbox') hash = '#/scan/mailbox';
else if (route === 'scan-entra') hash = '#/scan/entra';
else hash = '#/' + route;
if (window.location.hash !== hash) {
window.location.hash = hash;
} else {
applyRoute(route);
}
}
window.addEventListener('hashchange', function () {
applyRoute(parseRoute());
});
applyRoute(parseRoute());
// -------------------------------------------------------------------------
// Init
// -------------------------------------------------------------------------
function loadVersion() {
if (!els.appVersion) return;
requestJson('/api/version')
.then(function (data) {
if (data && data.version) els.appVersion.textContent = data.version;
})
.catch(function () { /* leave placeholder on failure */ });
}
loadVersion();
consumeOnboardingQueryState();
initOnboardingSection().catch(function () {
els.tenantSetupManual.removeAttribute('hidden');

View File

@ -0,0 +1,16 @@
<svg width="286" height="72" viewBox="0 0 286 72" fill="none" xmlns="http://www.w3.org/2000/svg" role="img" aria-labelledby="logoTitleDark logoDescDark">
<title id="logoTitleDark">Clearview</title>
<desc id="logoDescDark">Clearview logo for dark backgrounds</desc>
<g transform="translate(0 2)">
<ellipse cx="34" cy="34" rx="34" ry="20" fill="#0EA5E9" fill-opacity="0.20"/>
<ellipse cx="34" cy="34" rx="34" ry="20" stroke="#38BDF8" stroke-width="2.4"/>
<circle cx="34" cy="34" r="12" fill="#0EA5E9" fill-opacity="0.30"/>
<circle cx="34" cy="34" r="12" stroke="#38BDF8" stroke-width="2"/>
<circle cx="34" cy="31" r="4" fill="#38BDF8"/>
<rect x="32" y="34" width="4" height="8" rx="2" fill="#38BDF8"/>
<path d="M8 22C16 14 25 10 34 10C43 10 52 14 60 22" stroke="#38BDF8" stroke-opacity="0.55" stroke-width="2"/>
</g>
<text x="80" y="44" font-size="36" font-weight="600" font-family="'Space Grotesk', 'Avenir Next', 'Segoe UI', sans-serif">
<tspan fill="#38BDF8">Clear</tspan><tspan fill="#F4F7FB">view</tspan>
</text>
</svg>

After

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -3,331 +3,552 @@
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Clearview | SharePoint Permission Deviations</title>
<meta name="description" content="Clearview scans SharePoint sites and reports only permission deviations from root level.">
<title>Clearview | Permission Deviations</title>
<meta name="description" content="Clearview scans Microsoft 365 SharePoint sites and Exchange Online mailboxes for permission deviations.">
<link rel="icon" href="assets/favicon.svg" type="image/svg+xml">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=IBM+Plex+Sans:wght@400;500;600&display=swap" rel="stylesheet">
<link rel="stylesheet" href="styles.css">
</head>
<body>
<body class="app-shell">
<div class="bg-orb orb-one" aria-hidden="true"></div>
<div class="bg-orb orb-two" aria-hidden="true"></div>
<header class="topbar slide-in">
<a href="#" class="brand" aria-label="Clearview home">
<img src="assets/clearview-logo.svg" alt="Clearview logo" class="brand-logo">
</a>
<div class="topbar-actions">
<button id="refreshJobsBtn" class="btn btn-outline" type="button">Refresh</button>
<aside class="sidebar">
<div class="sidebar-brand">
<img src="assets/clearview-logo-dark.svg" alt="Clearview" class="brand-logo">
</div>
</header>
<nav class="sidebar-nav">
<a href="#/dashboard" class="nav-link" data-route="dashboard">Dashboard</a>
<a href="#/jobs" class="nav-link" data-route="jobs">Scan Jobs</a>
<main class="layout">
<section class="hero fade-up" style="--delay: 0.05s">
<p class="eyebrow">Root Permission Drift Detection</p>
<h1>Monitor SharePoint permissions across all your customers</h1>
<p class="lede">
Clearview scans down to folder and file level and reports only rights that deviate from the
root permissions of each site.
</p>
<div class="hero-stats" id="heroStats">
<article>
<span class="kpi" id="statTenants">0</span>
<span class="label">Tenants</span>
</article>
<article>
<span class="kpi" id="statJobs">0</span>
<span class="label">Jobs</span>
</article>
<article>
<span class="kpi" id="statRunning">0</span>
<span class="label">Active Jobs</span>
</article>
<div class="nav-section">SharePoint</div>
<a href="#/scan/sharepoint" class="nav-link" data-route="scan-sharepoint">New SP Scan</a>
<div class="nav-section">Mailboxes</div>
<a href="#/scan/mailbox" class="nav-link" data-route="scan-mailbox">New Mailbox Scan</a>
<div class="nav-section">Entra</div>
<a href="#/scan/entra" class="nav-link" data-route="scan-entra">New Entra Scan</a>
<div class="nav-spacer"></div>
<a href="#/tenants" class="nav-link" data-route="tenants">Tenants</a>
<a href="#/settings" class="nav-link" data-route="settings">Settings</a>
</nav>
<div class="sidebar-foot">
<span class="sidebar-version" id="appVersion" title="Running Clearview build"></span>
</div>
</aside>
<main class="content">
<header class="content-topbar">
<div class="content-title" id="contentTitle">Dashboard</div>
<div class="content-actions">
<button id="refreshJobsBtn" class="btn btn-outline" type="button">Refresh</button>
</div>
</header>
<!-- =================================================================== -->
<!-- Route: Dashboard -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="dashboard">
<div class="hero fade-up">
<p class="eyebrow">Permission Drift Detection</p>
<h1>Monitor Microsoft 365 permissions across all customers</h1>
<p class="lede">
Scan SharePoint sites for deviations from root permissions, and Exchange Online
mailboxes for delegated access (Full Access, Send As, Send on Behalf, folder delegations).
</p>
<div class="hero-stats">
<article>
<span class="kpi" id="statTenants">0</span>
<span class="label">Tenants</span>
</article>
<article>
<span class="kpi" id="statJobs">0</span>
<span class="label">Jobs</span>
</article>
<article>
<span class="kpi" id="statRunning">0</span>
<span class="label">Active Jobs</span>
</article>
</div>
</div>
</section>
<!-- ------------------------------------------------------------------ -->
<!-- Tenants panel -->
<!-- ------------------------------------------------------------------ -->
<section class="panel fade-up" style="--delay: 0.11s">
<div class="panel-header split">
<h2>Tenants</h2>
<button id="addTenantBtn" class="btn btn-outline" type="button">Add Tenant</button>
</div>
<!-- =================================================================== -->
<!-- Route: Tenants -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="tenants" hidden>
<div class="panel">
<div class="panel-header split">
<h2>Tenants</h2>
<button id="addTenantBtn" class="btn btn-outline" type="button">Add Tenant</button>
</div>
<!-- Add / Edit tenant form (hidden by default) -->
<div id="addTenantForm" class="scan-form" hidden>
<h3>New Tenant</h3>
<div id="addTenantForm" class="scan-form" hidden>
<h3>New Tenant</h3>
<!-- Automated onboarding -->
<div id="tenantSetupAutomated" class="setup-note" hidden>
<h3>Azure App Setup (automated)</h3>
<p>Connect to the customer's Microsoft tenant, then create a dedicated scan app automatically.</p>
<ul>
<li>Click <strong>Connect Microsoft</strong> and approve admin consent for the customer tenant</li>
<li>Created scan app receives SharePoint application permission: <code>Sites.FullControl.All</code></li>
</ul>
<form id="onboardingForm" class="onboarding-form" action="#" method="post">
<div class="onboarding-grid">
<div class="onboarding-wide">
<button id="connectMicrosoftBtn" class="btn btn-outline" type="button">Connect Microsoft</button>
<div id="tenantSetupAutomated" class="setup-note" hidden>
<h3>Azure App Setup (automated)</h3>
<p>Connect to the customer's Microsoft tenant, then create a dedicated scan app automatically.</p>
<ul>
<li>Click <strong>Connect Microsoft</strong> and approve admin consent.</li>
<li>Created scan app receives SharePoint <code>Sites.FullControl.All</code> with admin consent.</li>
<li>For mailbox scanning, the <strong>Exchange.ManageAsApp</strong> permission and <strong>Exchange Administrator</strong> Entra role must be added manually after creation — see the <em>Enable mailbox scanning</em> section below.</li>
</ul>
<form id="onboardingForm" class="onboarding-form" action="#" method="post">
<div class="onboarding-grid">
<div class="onboarding-wide">
<button id="connectMicrosoftBtn" class="btn btn-outline" type="button">Connect Microsoft</button>
</div>
<label class="onboarding-wide">
Connected Tenant ID
<input id="connectedTenantId" type="text" placeholder="Connect first to populate tenant id">
</label>
<label class="onboarding-wide">
New Scan App Display Name
<input id="scanAppDisplayName" type="text" value="Clearview Scan App">
</label>
</div>
<label class="onboarding-wide">
Connected Tenant ID
<input id="connectedTenantId" type="text" placeholder="Connect first to populate tenant id">
</label>
<label class="onboarding-wide">
New Scan App Display Name
<input id="scanAppDisplayName" type="text" value="Clearview Scan App">
</label>
</div>
<button class="btn btn-outline" type="submit">Create Scan App Automatically</button>
<button class="btn btn-outline" type="submit">Create Scan App Automatically</button>
</form>
</div>
<div id="tenantSetupManual" class="setup-note" hidden>
<h3>Azure App Setup (manual)</h3>
<p>Create a dedicated Azure app registration in the customer's tenant.</p>
<ol class="setup-steps">
<li>Open <strong>Azure Portal</strong><strong>Entra ID → App registrations → New registration</strong>.</li>
<li>Pick a name (e.g. <em>Clearview Scan App</em>), select <strong>Single tenant</strong>, click <strong>Register</strong>.</li>
<li>Copy <strong>Directory (tenant) ID</strong> and <strong>Application (client) ID</strong>.</li>
<li>For SharePoint: <strong>API permissions → Add a permission → SharePoint → Application permissions</strong>, select <code>Sites.FullControl.All</code>, then click <strong>Grant admin consent</strong>.</li>
<li>For group resolution (recommended): also add <strong>Microsoft Graph → Application permissions → <code>Group.Read.All</code></strong> and grant admin consent. This lets Clearview expand Microsoft 365 / Azure AD security groups to their members and owners during the <em>Resolve groups</em> action. Without it, M365 group entries are kept as a single line.</li>
<li>The primary domain is the tenant's default Microsoft 365 domain — typically <code>&lt;tenantname&gt;.onmicrosoft.com</code>. Find it in <strong>Microsoft 365 admin center → Settings → Domains</strong> (the <em>Default</em> entry).</li>
</ol>
</div>
<div id="tenantSetupMailbox" class="setup-note">
<h3>Enable mailbox scanning (Exchange Online)</h3>
<p>Mailbox scanning needs additional permissions on the scan app, on top of the SharePoint setup. Skip this section if the tenant only needs SharePoint scans.</p>
<ol class="setup-steps">
<li><strong>Add the API permission.</strong> Azure Portal → <strong>Entra ID → App registrations → [your scan app] → API permissions → Add a permission → APIs my organization uses</strong>. Search for <em>Office 365 Exchange Online</em>, choose <strong>Application permissions</strong> and tick <code>Exchange.ManageAsApp</code>. Click <strong>Add permissions</strong>.</li>
<li><strong>Grant admin consent.</strong> Still on the API permissions page, click <strong>Grant admin consent for &lt;tenant&gt;</strong>. Verify the status column shows <em>Granted for &lt;tenant&gt;</em>.</li>
<li><strong>Assign the Exchange Administrator role.</strong> Entra ID → <strong>Roles and administrators</strong> → search <em>Exchange Administrator</em> → click the role → <strong>Add assignments</strong> → search the scan app by name (you'll need to switch the picker to include <em>Service principals / Apps</em>) → select it and confirm. This role grants the app the right to read mailbox permissions; it cannot be granted via Microsoft Graph and must be done in the portal.</li>
<li><strong>Generate a certificate.</strong> Save the tenant first (this section's form), then use the <strong>Certificate</strong> button in the Tenants table to generate a self-signed RSA-2048 key. The public PEM appears in a panel — click <strong>Download .cer</strong>.</li>
<li><strong>Upload the certificate to Azure.</strong> Back in the scan app, go to <strong>Certificates &amp; secrets → Certificates → Upload certificate</strong>, pick the downloaded <code>.cer</code> file, and confirm. Azure shows the SHA-1 thumbprint — it must match the one shown in the Tenants table.</li>
<li><strong>Fill in the Primary Domain field</strong> on the tenant form (e.g. <code>contoso.onmicrosoft.com</code>). Clearview uses this for <code>Connect-ExchangeOnline -Organization</code> and to auto-fill the Mailbox scan form.</li>
<li><strong>Test the connection.</strong> Run a <em>Scan all mailboxes</em> job for this tenant; preflight on the first target validates that authentication works end-to-end.</li>
</ol>
<p class="setup-hint">Exchange Online does <strong>not</strong> support client-secret app-only authentication. Mailbox scans require a certificate. The same certificate is reused for SharePoint scans, so generating it once is enough.</p>
</div>
<div class="auth-grid">
<label class="onboarding-wide">
Tenant Name (label for your reference)
<input id="newTenantName" type="text" placeholder="Contoso">
</label>
<label>
Tenant ID
<input id="newTenantTenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Primary Domain <span style="font-weight:400;font-size:0.82rem">(used by mailbox scanning, e.g. contoso.onmicrosoft.com)</span>
<input id="newTenantPrimaryDomain" type="text" placeholder="contoso.onmicrosoft.com">
</label>
<label>
Client ID
<input id="newTenantClientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret <span style="font-weight:400;font-size:0.82rem">(optional — not needed when using a certificate; not supported for mailbox scans)</span>
<input id="newTenantClientSecret" type="password" placeholder="Leave empty if you will generate a certificate">
</label>
</div>
<div class="form-actions">
<button id="saveTenantBtn" class="btn btn-solid" type="button">Save Tenant</button>
<button id="cancelTenantBtn" class="btn btn-outline" type="button">Cancel</button>
</div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Name</th>
<th>Tenant ID</th>
<th>Client ID</th>
<th>Auth</th>
<th>Added</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="tenantsTableBody">
<tr><td colspan="6">No tenants configured yet.</td></tr>
</tbody>
</table>
</div>
<div id="tenantFeedback" class="feedback" aria-live="polite"></div>
<div id="certBlock" class="cert-block" hidden>
<h3>Public Certificate</h3>
<p>Upload this certificate in <strong>Azure Portal → App registrations → [your app] → Certificates &amp; secrets → Certificates → Upload certificate</strong>.</p>
<textarea id="certPem" class="cert-pem" rows="10" readonly></textarea>
<div class="form-actions">
<button id="downloadCertBtn" class="btn btn-solid" type="button">Download .cer</button>
<button id="copyCertBtn" class="btn btn-outline" type="button">Copy to clipboard</button>
<button id="closeCertBtn" class="btn btn-outline" type="button">Close</button>
</div>
</div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Scan SharePoint -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="scan-sharepoint" hidden>
<div class="panel">
<div class="panel-header split">
<h2>New SharePoint Scan</h2>
<span class="badge">SharePoint</span>
</div>
<div class="scan-form auth-block">
<h3>Scan mode</h3>
<label>
What to collect
<select id="sharepointScanMode">
<option value="sharepoint">Deviations from root (libraries, folders, files)</option>
<option value="sharepoint_root">Root permissions only (site-level role assignments)</option>
</select>
</label>
<p class="setup-hint">
<strong>Deviations from root</strong> traverses every document library and reports only permissions that
differ from the site root baseline. <strong>Root permissions only</strong> lists the role assignments
on the site root itself — much faster, useful for an inventory of who has site-level access.
</p>
</div>
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Select Tenant Profile
<select id="scanTenantSelect" data-shared-tenant-select>
<option value="">-- Select a tenant --</option>
<option value="__manual__">Manual credentials...</option>
</select>
</label>
</div>
<div id="manualCredentialsBlock" class="scan-form auth-block" hidden>
<h3>Microsoft App Credentials</h3>
<div class="auth-grid">
<label>
Tenant ID
<input id="tenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Client ID
<input id="clientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret
<input id="clientSecret" type="password" placeholder="Client secret">
</label>
</div>
</div>
<div class="form-grid">
<form id="manualScanForm" class="scan-form" action="#" method="post">
<h3>Manual URLs</h3>
<label>
Site URLs (one per line)
<textarea id="manualUrls" rows="6" placeholder="https://contoso.sharepoint.com/sites/finance&#10;https://contoso.sharepoint.com/sites/hr"></textarea>
</label>
<label class="checkline">
<input id="manualSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue manual scan</button>
</form>
<form id="csvScanForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import</h3>
<label>
Microsoft Sites export (CSV)
<input id="csvFile" type="file" accept=".csv,text/csv">
</label>
<label class="checkline">
<input id="csvSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
</div>
<!-- Manual onboarding -->
<div id="tenantSetupManual" class="setup-note" hidden>
<h3>Azure App Setup (manual)</h3>
<p>Create a dedicated Azure app registration in the customer's tenant and grant it SharePoint access.</p>
<ol class="setup-steps">
<li>Open <strong>Azure Portal</strong> and go to <strong>Entra ID &rarr; App registrations &rarr; New registration</strong>.</li>
<li>Fill in a name (e.g. <em>Clearview Scan App</em>), select <strong>Single tenant</strong>, click <strong>Register</strong>.</li>
<li>Copy the <strong>Directory (tenant) ID</strong> and <strong>Application (client) ID</strong> from the Overview page.</li>
<li>Go to <strong>API permissions &rarr; Add &rarr; SharePoint &rarr; Application permissions</strong>, add <code>Sites.FullControl.All</code>.</li>
<li>Click <strong>Grant admin consent</strong>.</li>
<li>Go to <strong>Certificates &amp; secrets &rarr; New client secret</strong>, copy the <strong>Value</strong> immediately.</li>
</ol>
<div id="submitFeedback" class="feedback" aria-live="polite"></div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Scan Mailbox -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="scan-mailbox" hidden>
<div class="panel">
<div class="panel-header split">
<h2>New Mailbox Scan</h2>
<span class="badge">Exchange Online</span>
</div>
<!-- Tenant fields -->
<div class="auth-grid">
<label class="onboarding-wide">
Tenant Name (label for your reference)
<input id="newTenantName" type="text" placeholder="Contoso">
</label>
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Tenant ID
<input id="newTenantTenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Client ID
<input id="newTenantClientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret <span style="font-weight:400;font-size:0.82rem">(optional — not needed when using a certificate)</span>
<input id="newTenantClientSecret" type="password" placeholder="Leave empty if you will generate a certificate">
Select Tenant Profile
<select id="mailboxScanTenantSelect" data-shared-tenant-select>
<option value="">-- Select a tenant --</option>
</select>
</label>
<p class="setup-hint">
Mailbox scanning requires a certificate on the tenant profile and the
<code>Exchange.ManageAsApp</code> permission with the Exchange Administrator role.
Client-secret authentication is not supported for Exchange Online.
</p>
</div>
<div class="form-actions">
<button id="saveTenantBtn" class="btn btn-solid" type="button">Save Tenant</button>
<button id="cancelTenantBtn" class="btn btn-outline" type="button">Cancel</button>
<div class="form-grid">
<form id="manualMailboxForm" class="scan-form" action="#" method="post">
<h3>Manual UPNs</h3>
<label>
User Principal Names (one per line)
<textarea id="manualMailboxes" rows="6" placeholder="alice@contoso.com&#10;bob@contoso.com"></textarea>
</label>
<button class="btn btn-solid" type="submit">Queue mailbox scan</button>
</form>
<form id="csvMailboxForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import</h3>
<label>
CSV with <code>UserPrincipalName</code> / <code>Email</code> column
<input id="csvMailboxFile" type="file" accept=".csv,text/csv">
</label>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
<form id="allMailboxesForm" class="scan-form" action="#" method="post">
<h3>All mailboxes in tenant</h3>
<label>
Organization (primary tenant domain)
<input id="allMailboxesOrg" type="text" placeholder="contoso.onmicrosoft.com">
</label>
<p class="setup-hint">
Clearview enumerates every mailbox in the tenant via <code>Get-EXOMailbox -ResultSize Unlimited</code>
and queues one target per mailbox. Can take 1060 seconds for large tenants.
</p>
<button class="btn btn-solid" type="submit">Queue scan for all mailboxes</button>
</form>
</div>
<div id="mailboxSubmitFeedback" class="feedback" aria-live="polite"></div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Scan Entra Groups -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="scan-entra" hidden>
<div class="panel">
<div class="panel-header split">
<h2>New Entra Group Scan</h2>
<span class="badge">Microsoft Graph</span>
</div>
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Select Tenant Profile
<select id="entraScanTenantSelect">
<option value="">-- Select a tenant --</option>
</select>
</label>
<p class="setup-hint">
Entra group scans use the <strong>Microsoft Graph</strong> API. The scan app needs the
Application permission <code>Group.Read.All</code> with admin consent. Authentication
uses the same tenant certificate as SharePoint and Mailbox scans.
</p>
</div>
<div class="form-grid">
<form id="manualEntraForm" class="scan-form" action="#" method="post">
<h3>Manual Object IDs</h3>
<label>
Group identifiers (one per line — Object ID, mail address, or display name)
<textarea id="manualEntraIds" rows="6" placeholder="00000000-0000-0000-0000-000000000000&#10;Pharmacology@contoso.onmicrosoft.com"></textarea>
</label>
<button class="btn btn-solid" type="submit">Queue Entra scan</button>
</form>
<form id="csvEntraForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import (Entra export)</h3>
<label>
CSV with <code>Object ID</code> column (Entra "Groups" export)
<input id="csvEntraFile" type="file" accept=".csv,text/csv">
</label>
<p class="setup-hint">
Export from Entra portal → Groups → All groups → Download. Clearview reads the
<code>Object ID</code> / <code>id</code> column; other columns are ignored.
</p>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
<form id="allEntraForm" class="scan-form" action="#" method="post">
<h3>All groups in tenant</h3>
<p class="setup-hint">
Enumerates every group in the tenant (any type) via Microsoft Graph and queues one
target per group. Can take 30120 seconds for large tenants.
</p>
<button class="btn btn-solid" type="submit">Queue scan for all groups</button>
</form>
</div>
<div id="entraSubmitFeedback" class="feedback" aria-live="polite"></div>
</div>
</section>
<!-- =================================================================== -->
<!-- Route: Jobs (list + selected job details) -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="jobs" hidden>
<div class="panel">
<div class="panel-header split">
<h2>Scan Jobs</h2>
<div class="panel-header-right">
<select id="jobTypeFilter" class="filter-select">
<option value="">All types</option>
<option value="sharepoint">SharePoint deviations</option>
<option value="sharepoint_root">SharePoint root</option>
<option value="mailbox">Mailbox</option>
<option value="entra_groups">Entra groups</option>
</select>
<select id="jobTenantFilter" class="filter-select">
<option value="">All tenants</option>
</select>
<span id="jobAutoRefresh" class="badge">Auto refresh: on</span>
</div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Job ID</th>
<th>Type</th>
<th>Tenant</th>
<th>Source</th>
<th>Status</th>
<th>Targets</th>
<th>Items</th>
<th>Updated</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="jobsTableBody">
<tr><td colspan="9">No jobs yet.</td></tr>
</tbody>
</table>
</div>
</div>
<!-- Tenants table -->
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Name</th>
<th>Tenant ID</th>
<th>Client ID</th>
<th>Auth</th>
<th>Added</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="tenantsTableBody">
<tr><td colspan="6">No tenants configured yet.</td></tr>
</tbody>
</table>
</div>
<div class="panel">
<div class="panel-header split">
<h2>Selected Job Details</h2>
<div class="panel-header-right">
<select id="jobSiteFilter" class="filter-select">
<option value="">All targets</option>
</select>
<button id="exportJobBtn" class="btn btn-outline" type="button" hidden>Export Excel</button>
<span id="selectedJobId" class="badge">No selection</span>
</div>
</div>
<div id="tenantFeedback" class="feedback" aria-live="polite"></div>
<div id="jobSummary" class="job-summary">Select a job to inspect targets and deviations.</div>
<div id="jobActivity" class="job-activity" hidden></div>
<!-- Certificate display block (shown after generation) -->
<div id="certBlock" class="cert-block" hidden>
<h3>Public Certificate</h3>
<p>Upload this certificate in <strong>Azure Portal &rarr; App registrations &rarr; [your app] &rarr; Certificates &amp; secrets &rarr; Certificates &rarr; Upload certificate</strong>.</p>
<textarea id="certPem" class="cert-pem" rows="10" readonly></textarea>
<div class="form-actions">
<button id="downloadCertBtn" class="btn btn-solid" type="button">Download .cer</button>
<button id="copyCertBtn" class="btn btn-outline" type="button">Copy to clipboard</button>
<button id="closeCertBtn" class="btn btn-outline" type="button">Close</button>
<h3 class="subheading" id="targetsHeading">Targets</h3>
<div class="table-wrap compact-wrap">
<table>
<thead id="targetsTableHead">
<tr>
<th>URL</th>
<th>Status</th>
<th>Attempts</th>
<th>Error</th>
<th>Connection test</th>
<th></th>
</tr>
</thead>
<tbody id="targetsTableBody">
<tr><td colspan="6">No job selected.</td></tr>
</tbody>
</table>
</div>
<div id="sharingLinksResolveBlock" hidden>
<h3 class="subheading">Resolve Sharing Links</h3>
<p class="resolve-hint">Fetch the actual recipients for the selected link types. Anonymous links have no resolvable members.</p>
<div id="sharingLinksTypes" class="sharing-link-types"></div>
<div class="form-actions" style="margin-top:0.6rem">
<button id="resolveSharingLinksBtn" class="btn btn-outline" type="button">Resolve</button>
</div>
<div id="resolveFeedback" class="feedback" aria-live="polite"></div>
</div>
<div id="resolveGroupsBlock" hidden>
<h3 class="subheading">Resolve SharePoint Groups</h3>
<p class="resolve-hint">
Expand SharePoint groups (Owners / Members / Visitors / custom site groups) to the underlying
user list. When a member is itself a Microsoft 365 / Azure AD group, Clearview recursively
expands it via Microsoft Graph (members + owners, depth 3) — requires
<code>Group.Read.All</code> on Microsoft Graph for that tenant. Without that permission the
M365 group lines stay collapsed. Members are written to the deviation rows and Excel export.
</p>
<div class="form-actions" style="margin-top:0.6rem">
<button id="resolveGroupsBtn" class="btn btn-outline" type="button">Resolve groups</button>
</div>
<div id="resolveGroupsFeedback" class="feedback" aria-live="polite"></div>
</div>
<h3 class="subheading">Permission Deviations</h3>
<div class="table-wrap deviations-wrap">
<table>
<thead id="deviationsTableHead">
<tr>
<th>Site</th>
<th>Object</th>
<th>Type</th>
<th>Principal</th>
<th>Role</th>
<th>Delta</th>
</tr>
</thead>
<tbody id="deviationsTableBody">
<tr><td colspan="6">No deviation data yet.</td></tr>
</tbody>
</table>
</div>
</div>
</section>
<!-- ------------------------------------------------------------------ -->
<!-- Start New Scan panel -->
<!-- ------------------------------------------------------------------ -->
<section class="panel fade-up" style="--delay: 0.17s">
<div class="panel-header split">
<h2>Start New Scan</h2>
<span class="badge">Async job queue</span>
</div>
<!-- Tenant selector -->
<div class="scan-form auth-block">
<h3>Tenant</h3>
<label>
Select Tenant Profile
<select id="scanTenantSelect">
<option value="">-- Select a tenant --</option>
<option value="__manual__">Manual credentials...</option>
</select>
</label>
</div>
<!-- Manual credentials (only shown when __manual__ selected) -->
<div id="manualCredentialsBlock" class="scan-form auth-block" hidden>
<h3>Microsoft App Credentials</h3>
<div class="auth-grid">
<label>
Tenant ID
<input id="tenantId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label>
Client ID
<input id="clientId" type="text" placeholder="00000000-0000-0000-0000-000000000000">
</label>
<label class="auth-secret">
Client Secret
<input id="clientSecret" type="password" placeholder="Client secret">
</label>
<!-- =================================================================== -->
<!-- Route: Settings (placeholder) -->
<!-- =================================================================== -->
<section class="route-page" data-route-page="settings" hidden>
<div class="panel">
<div class="panel-header split">
<h2>Settings</h2>
</div>
</div>
<div class="form-grid">
<form id="manualScanForm" class="scan-form" action="#" method="post">
<h3>Manual URLs</h3>
<label>
Site URLs (one per line)
<textarea id="manualUrls" rows="6" placeholder="https://contoso.sharepoint.com/sites/finance&#10;https://contoso.sharepoint.com/sites/hr"></textarea>
</label>
<label class="checkline">
<input id="manualSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue manual scan</button>
</form>
<form id="csvScanForm" class="scan-form" action="#" method="post" enctype="multipart/form-data">
<h3>CSV Import</h3>
<label>
Microsoft Sites export (CSV)
<input id="csvFile" type="file" accept=".csv,text/csv">
</label>
<label class="checkline">
<input id="csvSkipDefaults" type="checkbox" checked>
<span>Skip default sites (tenant root, app catalog)</span>
</label>
<button class="btn btn-solid" type="submit">Queue CSV scan</button>
</form>
</div>
<div id="submitFeedback" class="feedback" aria-live="polite"></div>
</section>
<!-- ------------------------------------------------------------------ -->
<!-- Scan Jobs panel -->
<!-- ------------------------------------------------------------------ -->
<section class="panel fade-up" style="--delay: 0.23s">
<div class="panel-header split">
<h2>Scan Jobs</h2>
<div class="panel-header-right">
<select id="jobTenantFilter" class="filter-select">
<option value="">All tenants</option>
</select>
<span id="jobAutoRefresh" class="badge">Auto refresh: on</span>
</div>
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Job ID</th>
<th>Tenant</th>
<th>Source</th>
<th>Status</th>
<th>Targets</th>
<th>Items</th>
<th>Updated</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="jobsTableBody">
<tr><td colspan="8">No jobs yet.</td></tr>
</tbody>
</table>
</div>
</section>
<!-- ------------------------------------------------------------------ -->
<!-- Selected Job Details panel -->
<!-- ------------------------------------------------------------------ -->
<section class="panel fade-up" style="--delay: 0.29s">
<div class="panel-header split">
<h2>Selected Job Details</h2>
<div class="panel-header-right">
<select id="jobSiteFilter" class="filter-select">
<option value="">All sites</option>
</select>
<button id="exportJobBtn" class="btn btn-outline" type="button" hidden>Export Excel</button>
<span id="selectedJobId" class="badge">No selection</span>
</div>
</div>
<div id="jobSummary" class="job-summary">Select a job to inspect targets and deviations.</div>
<div id="jobActivity" class="job-activity" hidden></div>
<h3 class="subheading">Targets</h3>
<div class="table-wrap compact-wrap">
<table>
<thead>
<tr>
<th>URL</th>
<th>Status</th>
<th>Attempts</th>
<th>Error</th>
</tr>
</thead>
<tbody id="targetsTableBody">
<tr><td colspan="4">No job selected.</td></tr>
</tbody>
</table>
</div>
<div id="sharingLinksResolveBlock" hidden>
<h3 class="subheading">Resolve Sharing Links</h3>
<p class="resolve-hint">Fetch the actual recipients for the selected link types. Anonymous links have no resolvable members.</p>
<div id="sharingLinksTypes" class="sharing-link-types"></div>
<div class="form-actions" style="margin-top:0.6rem">
<button id="resolveSharingLinksBtn" class="btn btn-outline" type="button">Resolve</button>
</div>
<div id="resolveFeedback" class="feedback" aria-live="polite"></div>
</div>
<h3 class="subheading">Permission Deviations</h3>
<div class="table-wrap deviations-wrap">
<table>
<thead>
<tr>
<th>Site</th>
<th>Object</th>
<th>Type</th>
<th>Principal</th>
<th>Role</th>
<th>Delta</th>
</tr>
</thead>
<tbody id="deviationsTableBody">
<tr><td colspan="6">No deviation data yet.</td></tr>
</tbody>
</table>
<p class="setup-hint">Runtime configuration is currently controlled via environment variables in <code>stack/.env</code>. See the <strong>TECHNICAL.md</strong> document for the full list (timeouts, retries, scan caps, onboarding).</p>
</div>
</section>
</main>

View File

@ -642,3 +642,157 @@ strong {
flex: 1;
}
}
/* ===========================================================================
Sidebar layout (added in mailbox-scanning refactor)
=========================================================================== */
.app-shell {
display: grid;
grid-template-columns: 220px 1fr;
min-height: 100vh;
}
.sidebar {
background: linear-gradient(180deg, #0f1d33 0%, #0b1424 100%);
color: #e6edf7;
display: flex;
flex-direction: column;
padding: 0;
position: sticky;
top: 0;
height: 100vh;
border-right: 1px solid rgba(255, 255, 255, 0.06);
}
.sidebar-brand {
height: 64px;
display: flex;
align-items: center;
padding: 0 1rem;
border-bottom: 1px solid rgba(255, 255, 255, 0.08);
}
.sidebar-brand .brand-logo {
height: 36px;
filter: brightness(1.05) saturate(1.1);
}
.sidebar-nav {
flex: 1;
display: flex;
flex-direction: column;
gap: 0.15rem;
padding: 0.75rem 0.5rem;
overflow-y: auto;
}
.sidebar-nav .nav-link {
display: block;
padding: 0.5rem 0.75rem;
border-radius: 8px;
color: rgba(230, 237, 247, 0.85);
text-decoration: none;
font-size: 0.9rem;
font-weight: 500;
transition: background-color 0.12s ease, color 0.12s ease;
}
.sidebar-nav .nav-link:hover {
background: rgba(255, 255, 255, 0.06);
color: #ffffff;
}
.sidebar-nav .nav-link.active {
background: rgba(14, 165, 233, 0.18);
color: #ffffff;
box-shadow: inset 2px 0 0 var(--cv-accent);
}
.sidebar-nav .nav-section {
padding: 0.65rem 0.75rem 0.25rem;
font-size: 0.7rem;
font-weight: 700;
letter-spacing: 0.08em;
text-transform: uppercase;
color: rgba(230, 237, 247, 0.45);
}
.sidebar-nav .nav-spacer {
flex: 1;
min-height: 1rem;
}
.sidebar-foot {
padding: 0.75rem 1rem;
border-top: 1px solid rgba(255, 255, 255, 0.08);
font-size: 0.78rem;
color: rgba(230, 237, 247, 0.55);
}
.content {
display: flex;
flex-direction: column;
min-width: 0;
padding: 0 1.25rem 2rem;
}
.content-topbar {
display: flex;
align-items: center;
justify-content: space-between;
height: 64px;
border-bottom: 1px solid var(--cv-border);
margin-bottom: 1rem;
}
.content-title {
font-family: "Space Grotesk", sans-serif;
font-size: 1.15rem;
font-weight: 600;
}
.content-actions {
display: flex;
gap: 0.5rem;
}
.route-page {
display: flex;
flex-direction: column;
gap: 1rem;
}
.route-page[hidden] {
display: none !important;
}
.setup-hint {
font-size: 0.85rem;
color: var(--cv-text-secondary);
margin: 0.5rem 0 0;
}
@media (max-width: 900px) {
.app-shell {
grid-template-columns: 1fr;
}
.sidebar {
position: static;
height: auto;
flex-direction: row;
flex-wrap: wrap;
}
.sidebar-nav {
flex-direction: row;
flex-wrap: wrap;
overflow-x: auto;
}
.sidebar-nav .nav-section,
.sidebar-nav .nav-spacer {
display: none;
}
.sidebar-foot {
display: none;
}
}

View File

@ -2,13 +2,18 @@ from __future__ import annotations
import csv
import io
import re
from .default_sites import normalize_site_url
_EMAIL_RE = re.compile(r"^[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}$")
class CsvImportResult:
def __init__(self) -> None:
self.urls: list[str] = []
self.mailboxes: list[str] = []
self.invalid_rows: list[str] = []
self.total_rows: int = 0
@ -22,7 +27,7 @@ def parse_sites_csv(content: bytes) -> CsvImportResult:
if not reader.fieldnames:
return result
url_key = _resolve_url_column(reader.fieldnames)
url_key = _resolve_column(reader.fieldnames, ("url", "site url", "siteurl"))
if not url_key:
return result
@ -49,9 +54,80 @@ def parse_sites_csv(content: bytes) -> CsvImportResult:
return result
def _resolve_url_column(fieldnames: list[str]) -> str | None:
def parse_entra_groups_csv(content: bytes) -> CsvImportResult:
result = CsvImportResult()
text = content.decode("utf-8-sig", errors="replace")
reader = csv.DictReader(io.StringIO(text))
if not reader.fieldnames:
return result
id_key = _resolve_column(
reader.fieldnames,
("object id", "objectid", "id", "objectguid", "object_id"),
)
if not id_key:
return result
seen: set[str] = set()
for idx, row in enumerate(reader, start=2):
result.total_rows += 1
raw = (row.get(id_key) or "").strip()
if not raw:
result.invalid_rows.append(f"row {idx}: empty Object ID")
continue
normalized = raw.lower()
if normalized in seen:
continue
seen.add(normalized)
result.urls.append(normalized)
return result
def parse_mailboxes_csv(content: bytes) -> CsvImportResult:
result = CsvImportResult()
text = content.decode("utf-8-sig", errors="replace")
reader = csv.DictReader(io.StringIO(text))
if not reader.fieldnames:
return result
upn_key = _resolve_column(
reader.fieldnames,
("userprincipalname", "upn", "email", "emailaddress", "mail", "mailbox", "primary smtp address"),
)
if not upn_key:
return result
seen: set[str] = set()
for idx, row in enumerate(reader, start=2):
result.total_rows += 1
raw = (row.get(upn_key) or "").strip()
if not raw:
result.invalid_rows.append(f"row {idx}: empty mailbox")
continue
normalized = raw.lower()
if not _EMAIL_RE.match(normalized):
result.invalid_rows.append(f"row {idx}: invalid mailbox '{raw}'")
continue
if normalized in seen:
continue
seen.add(normalized)
result.mailboxes.append(normalized)
return result
def _resolve_column(fieldnames: list[str], candidates: tuple[str, ...]) -> str | None:
mapping = {name.strip().lower(): name for name in fieldnames}
for candidate in ("url", "site url", "siteurl"):
for candidate in candidates:
if candidate in mapping:
return mapping[candidate]
return None

View File

@ -1,18 +1,18 @@
from __future__ import annotations
import io
import re
import uuid
from datetime import datetime
from pathlib import Path
import io
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse, RedirectResponse, Response, StreamingResponse
from fastapi.staticfiles import StaticFiles
from sqlalchemy import select, text
from sqlalchemy.orm import joinedload
from .csv_import import parse_sites_csv
from .csv_import import parse_entra_groups_csv, parse_mailboxes_csv, parse_sites_csv
from .db import SessionLocal, engine
from .default_sites import is_default_site, normalize_site_url
from .models import Base, PermissionDeviation, ScanJob, ScanTarget, TenantProfile
@ -25,8 +25,11 @@ from .schemas import (
CreateScanJobRequest,
CreateTenantProfileRequest,
PermissionDeviationItem,
ProbeResultResponse,
ResolveGroupsResponse,
ResolveSharingLinksRequest,
ResolveSharingLinksResponse,
SharingLinkTypesResponse,
ScanJobCreateResponse,
ScanJobDetail,
ScanJobSummary,
@ -34,15 +37,44 @@ from .schemas import (
TenantCertificateResponse,
TenantProfileItem,
)
from .scanner import AuthConfig
from .scanners import AuthConfig, probe
from .version import display_version
from .worker import ScanWorker
app = FastAPI(title="Clearview API", version="0.1.0")
app = FastAPI(title="Clearview API", version=display_version().lstrip("v"))
worker = ScanWorker()
SITE_DIR = Path(__file__).resolve().parents[2] / "site"
def _extract_sharing_link_group_and_type(principal: str) -> tuple[str, str] | None:
"""
Extract (group_name, link_type) from principal values such as:
- SharingLinks.<guid>.<LinkType>.<guid>
- c:0o.c|federateddirectoryclaimprovider|SharingLinks.<guid>.<LinkType>.<guid>
"""
if not principal:
return None
text = principal.strip()
segments = [s.strip() for s in text.split("|") if s.strip()]
candidate = ""
for segment in reversed(segments):
if segment.lower().startswith("sharinglinks."):
candidate = segment
break
if not candidate and text.lower().startswith("sharinglinks."):
candidate = text
if not candidate:
return None
parts = candidate.split(".")
if len(parts) < 3:
return None
return candidate, parts[2]
@app.on_event("startup")
def on_startup() -> None:
Base.metadata.create_all(bind=engine)
@ -60,6 +92,12 @@ def healthz() -> dict[str, str]:
return {"status": "ok"}
@app.get("/api/version")
def version() -> dict[str, str]:
"""Return the running build's user-visible version (e.g. v0.1.0.3)."""
return {"version": display_version()}
# ---------------------------------------------------------------------------
# Tenant profiles
# ---------------------------------------------------------------------------
@ -81,6 +119,7 @@ def create_tenant(payload: CreateTenantProfileRequest) -> TenantProfileItem:
id=str(uuid.uuid4()),
name=payload.name.strip(),
tenant_id=payload.tenant_id.strip(),
primary_domain=payload.primary_domain.strip().lower() if payload.primary_domain else None,
client_id=payload.client_id.strip(),
client_secret=payload.client_secret.strip() if payload.client_secret else None,
created_at=now,
@ -100,6 +139,7 @@ def generate_certificate(profile_id: str) -> TenantCertificateResponse:
raise HTTPException(status_code=404, detail="Tenant profile not found")
result = generate_tenant_certificate()
profile.cert_private_key = result.private_key_pem
profile.cert_public_pem = result.public_cert_pem
profile.cert_thumbprint = result.thumbprint
profile.cert_expires_at = result.expires_at
profile.updated_at = datetime.utcnow()
@ -141,11 +181,43 @@ def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
client_id=payload.client_id,
client_secret=payload.client_secret,
)
raw_urls = [str(item) for item in payload.site_urls]
return _create_job_from_urls(
raw_urls=raw_urls,
source_type = "manual"
if payload.scan_type == "entra_groups":
if payload.scan_all_groups:
raw_targets = _enumerate_all_entra_groups(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
profile_id=profile_id,
)
source_type = "tenant_all"
else:
raw_targets = [str(g) for g in payload.group_ids]
elif payload.scan_type == "mailbox":
if payload.scan_all_mailboxes:
organization = payload.organization
if (not organization) and profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile and profile.primary_domain:
organization = profile.primary_domain
raw_targets = _enumerate_all_mailboxes(
organization=organization,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
profile_id=profile_id,
)
source_type = "tenant_all"
else:
raw_targets = [str(m) for m in payload.mailboxes]
else:
raw_targets = [str(item) for item in payload.site_urls]
return _create_job_from_targets(
raw_targets=raw_targets,
scan_type=payload.scan_type,
skip_default_sites=payload.skip_default_sites,
source_type="manual",
source_type=source_type,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
@ -156,6 +228,7 @@ def create_scan_job(payload: CreateScanJobRequest) -> ScanJobCreateResponse:
@app.post("/api/scan-jobs/import-csv", response_model=ScanJobCreateResponse)
def create_scan_job_from_csv(
skip_default_sites: bool = True,
scan_type: str = Form("sharepoint"),
tenant_profile_id: str | None = Form(None),
tenant_id: str | None = Form(None),
client_id: str | None = Form(None),
@ -171,9 +244,18 @@ def create_scan_job_from_csv(
client_secret=client_secret,
)
content = file.file.read()
parsed = parse_sites_csv(content)
response = _create_job_from_urls(
raw_urls=parsed.urls,
if scan_type == "mailbox":
parsed = parse_mailboxes_csv(content)
targets = parsed.mailboxes
elif scan_type == "entra_groups":
parsed = parse_entra_groups_csv(content)
targets = parsed.urls
else:
parsed = parse_sites_csv(content)
targets = parsed.urls
response = _create_job_from_targets(
raw_targets=targets,
scan_type=scan_type,
skip_default_sites=skip_default_sites,
source_type="csv",
tenant_id=resolved_tenant_id,
@ -234,7 +316,11 @@ def delete_scan_job(job_id: str) -> Response:
@app.get("/api/scan-jobs", response_model=list[ScanJobSummary])
def list_scan_jobs(limit: int = 20, tenant_profile_id: str | None = None) -> list[ScanJobSummary]:
def list_scan_jobs(
limit: int = 20,
tenant_profile_id: str | None = None,
scan_type: str | None = None,
) -> list[ScanJobSummary]:
with SessionLocal() as db:
stmt = (
select(ScanJob)
@ -244,10 +330,36 @@ def list_scan_jobs(limit: int = 20, tenant_profile_id: str | None = None) -> lis
)
if tenant_profile_id:
stmt = stmt.where(ScanJob.tenant_profile_id == tenant_profile_id)
if scan_type:
stmt = stmt.where(ScanJob.scan_type == scan_type)
jobs = list(db.execute(stmt).unique().scalars())
return [_to_job_summary(job) for job in jobs]
@app.get("/api/scan-jobs/{job_id}/sharing-link-types", response_model=SharingLinkTypesResponse)
def get_sharing_link_types(job_id: str) -> SharingLinkTypesResponse:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
principals = list(
db.execute(
select(PermissionDeviation.principal).where(PermissionDeviation.job_id == job_id)
).scalars()
)
type_counts: dict[str, int] = {}
for principal in principals:
parsed = _extract_sharing_link_group_and_type(str(principal or ""))
if not parsed:
continue
_group_name, link_type = parsed
type_counts[link_type] = type_counts.get(link_type, 0) + 1
return SharingLinkTypesResponse(type_counts=type_counts)
@app.post("/api/scan-jobs/{job_id}/resolve-sharing-links", response_model=ResolveSharingLinksResponse)
def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequest) -> ResolveSharingLinksResponse:
from .scanner import resolve_sharing_link_members
@ -261,11 +373,13 @@ def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequ
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
@ -273,6 +387,7 @@ def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequ
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
all_deviations = list(
@ -282,15 +397,13 @@ def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequ
# Group by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
if not dev.principal.startswith("SharingLinks."):
parsed = _extract_sharing_link_group_and_type(dev.principal)
if not parsed:
continue
parts = dev.principal.split(".", 3)
if len(parts) < 3:
continue
link_type = parts[2]
group_name, link_type = parsed
if link_type not in payload.link_types:
continue
key = (dev.site_url, dev.principal)
key = (dev.site_url, group_name)
groups.setdefault(key, []).append(dev.id)
updated_deviations = 0
@ -311,6 +424,149 @@ def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequ
)
@app.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
"""
Expand group principals on this job's deviations and write each group's
member list to permission_deviations.resolved_members. Handles both
classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
assigned directly at root (via Microsoft Graph). Skips email-shape users
and SharingLinks groups (those have their own resolver).
"""
from .scanners.sharepoint import (
is_aad_group_principal,
is_sharepoint_group_principal,
resolve_aad_group_members,
resolve_sharing_link_members,
)
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
if (job.scan_type or "sharepoint") == "mailbox":
raise HTTPException(status_code=400, detail="Group resolution is only available for SharePoint jobs")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
all_deviations = list(
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group deviations by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
continue
key = (dev.site_url, dev.principal)
groups.setdefault(key, []).append(dev.id)
resolved = 0
skipped = 0
updated = 0
for (site_url, group_name), dev_ids in groups.items():
try:
if is_aad_group_principal(group_name):
members = resolve_aad_group_members(group_name, auth)
else:
members = resolve_sharing_link_members(site_url, group_name, auth)
except Exception: # noqa: BLE001
members = []
if not members:
skipped += 1
continue
resolved_text = ", ".join(members)
with SessionLocal() as db:
for dev_id in dev_ids:
dev = db.get(PermissionDeviation, dev_id)
if dev:
dev.resolved_members = resolved_text
db.commit()
resolved += 1
updated += len(dev_ids)
return ResolveGroupsResponse(
resolved_groups=resolved,
skipped_groups=skipped,
updated_deviations=updated,
)
@app.post("/api/scan-jobs/{job_id}/targets/{target_id}/test-connection", response_model=ProbeResultResponse)
def test_target_connection(job_id: str, target_id: int) -> ProbeResultResponse:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
target = db.get(ScanTarget, target_id)
if not target or target.job_id != job_id:
raise HTTPException(status_code=404, detail="Target not found")
if job.status in ("queued", "running"):
raise HTTPException(status_code=409, detail="Job is still running")
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
site_url = target.site_url
job_scan_type = job.scan_type or "sharepoint"
result = probe(job_scan_type, site_url, auth)
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if not target:
raise HTTPException(status_code=404, detail="Target not found")
now = datetime.utcnow()
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
target.updated_at = now
db.commit()
db.refresh(target)
return ProbeResultResponse(
target_id=target.id,
ok=result.ok,
message=result.message,
last_probe_at=target.last_probe_at,
)
@app.get("/api/scan-jobs/{job_id}/export")
def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingResponse:
import openpyxl
@ -364,10 +620,19 @@ def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingRespon
cell.font = header_font_white
cell.fill = header_fill
scan_type = job.scan_type or "sharepoint"
target_label = {
"sharepoint": "Site URL",
"sharepoint_root": "Site URL",
"mailbox": "Mailbox",
"entra_groups": "Group",
}.get(scan_type, "Target")
# Targets sheet
ws_targets = wb.active
ws_targets.title = "Targets"
_style_header(ws_targets, ["Site URL", "Status", "Attempts", "Error", "Started", "Finished"])
_style_header(ws_targets, [target_label, "Status", "Attempts", "Error", "Started", "Finished"])
for t in targets:
ws_targets.append([
t.site_url,
@ -380,29 +645,64 @@ def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingRespon
for col in ws_targets.columns:
ws_targets.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
# Deviations sheet
ws_dev = wb.create_sheet("Deviations")
_style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
for d in deviations:
base = (d.site_url or "").rstrip("/")
obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
link_risk = _sharing_link_risk_label(d.principal)
ws_dev.append([
d.site_url,
obj_rel,
d.object_type,
d.principal,
link_risk,
d.resolved_members or "",
d.role_name,
d.delta_type,
])
if link_risk in _risk_styles:
risk_fill, risk_font = _risk_styles[link_risk]
risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
risk_cell.fill = risk_fill
risk_cell.font = risk_font
# Results sheet — name and columns depend on scan type
if scan_type == "mailbox":
ws_dev = wb.create_sheet("Mailbox Permissions")
_style_header(ws_dev, ["Mailbox", "Object", "Permission Type", "Principal", "Access Rights"])
deviations.sort(key=lambda d: (d.site_url or "", d.permission_type or "", d.principal or ""))
for d in deviations:
ws_dev.append([
d.site_url,
d.object_url,
d.permission_type or d.object_type,
d.principal,
d.role_name,
])
elif scan_type == "entra_groups":
ws_dev = wb.create_sheet("Group Memberships")
_style_header(ws_dev, ["Group", "Group Type", "User", "Role"])
deviations.sort(key=lambda d: (d.object_url or "", d.role_name or "", d.principal or ""))
for d in deviations:
ws_dev.append([
d.object_url,
d.permission_type or "",
d.principal,
d.role_name,
])
elif scan_type == "sharepoint_root":
ws_dev = wb.create_sheet("Root Permissions")
_style_header(ws_dev, ["Site URL", "Principal", "Resolved Members", "Role"])
deviations.sort(key=lambda d: (d.site_url or "", d.principal or "", d.role_name or ""))
for d in deviations:
ws_dev.append([
d.site_url,
d.principal,
d.resolved_members or "",
d.role_name,
])
else:
ws_dev = wb.create_sheet("Deviations")
_style_header(ws_dev, ["Site URL", "Object URL", "Object Type", "Principal", "Link Risk", "Resolved Members", "Role", "Delta"])
deviations.sort(key=lambda d: (d.site_url or "", d.object_url or "", d.principal or ""))
for d in deviations:
base = (d.site_url or "").rstrip("/")
obj_rel = d.object_url[len(base):] if base and d.object_url.startswith(base) else d.object_url
link_risk = _sharing_link_risk_label(d.principal)
ws_dev.append([
d.site_url,
obj_rel,
d.object_type,
d.principal,
link_risk,
d.resolved_members or "",
d.role_name,
d.delta_type,
])
if link_risk in _risk_styles:
risk_fill, risk_font = _risk_styles[link_risk]
risk_cell = ws_dev.cell(row=ws_dev.max_row, column=5)
risk_cell.fill = risk_fill
risk_cell.font = risk_font
for col in ws_dev.columns:
ws_dev.column_dimensions[col[0].column_letter].width = max(len(str(c.value or "")) for c in col) + 4
@ -410,7 +710,7 @@ def export_scan_job(job_id: str, site_url: str | None = None) -> StreamingRespon
wb.save(buf)
buf.seek(0)
filename = f"clearview_job_{job_id}.xlsx"
filename = _build_export_filename(job, job_id)
return StreamingResponse(
buf,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@ -452,6 +752,9 @@ def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
error_message=t.error_message,
started_at=t.started_at,
finished_at=t.finished_at,
last_probe_at=t.last_probe_at,
last_probe_ok=t.last_probe_ok,
last_probe_message=t.last_probe_message,
)
for t in targets
],
@ -464,6 +767,7 @@ def get_scan_job(job_id: str, site_url: str | None = None) -> ScanJobDetail:
principal=d.principal,
role_name=d.role_name,
delta_type=d.delta_type,
permission_type=d.permission_type,
resolved_members=d.resolved_members,
created_at=d.created_at,
)
@ -549,6 +853,98 @@ app.mount("/", StaticFiles(directory=SITE_DIR, html=True), name="site")
# Helpers
# ---------------------------------------------------------------------------
_SCAN_TYPE_LABELS = {
"sharepoint": "Deviations",
"sharepoint_root": "Root",
"mailbox": "Mailbox",
"entra_groups": "EntraGroups",
}
def _build_export_filename(job: ScanJob, job_id: str) -> str:
tenant_label = (job.tenant_profile.name if job.tenant_profile else None) or "Manual"
safe_tenant = re.sub(r"[^A-Za-z0-9_-]+", "_", tenant_label).strip("_") or "Manual"
scan_type = job.scan_type or "sharepoint"
type_label = _SCAN_TYPE_LABELS.get(scan_type, scan_type)
short_id = job_id.replace("-", "")[-12:]
return f"ClearView_{safe_tenant}_{type_label}_{short_id}.xlsx"
def _enumerate_all_entra_groups(
tenant_id: str,
client_id: str,
client_secret: str | None,
profile_id: str | None,
) -> list[str]:
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
from .scanners import entra as _entra
try:
return _entra.list_all_groups(auth)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"Group enumeration failed: {exc}") from exc
def _enumerate_all_mailboxes(
organization: str | None,
tenant_id: str,
client_id: str,
client_secret: str | None,
profile_id: str | None,
) -> list[str]:
if not organization or "." not in organization:
raise HTTPException(
status_code=400,
detail="organization (e.g. contoso.onmicrosoft.com) is required when scan_all_mailboxes is true",
)
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if profile_id:
with SessionLocal() as db:
profile = db.get(TenantProfile, profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
from .scanners import mailbox as _mailbox
try:
return _mailbox.list_mailboxes(organization=organization.strip().lower(), auth=auth)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"Mailbox enumeration failed: {exc}") from exc
def _resolve_credentials(
db,
tenant_profile_id: str | None,
@ -574,8 +970,9 @@ def _resolve_credentials(
)
def _create_job_from_urls(
raw_urls: list[str],
def _create_job_from_targets(
raw_targets: list[str],
scan_type: str,
skip_default_sites: bool,
source_type: str,
tenant_id: str,
@ -583,59 +980,74 @@ def _create_job_from_urls(
client_secret: str,
tenant_profile_id: str | None = None,
) -> ScanJobCreateResponse:
accepted_urls: list[str] = []
accepted: list[str] = []
skipped_default_urls: list[str] = []
invalid_urls: list[str] = []
invalid: list[str] = []
seen: set[str] = set()
for raw in raw_urls:
normalized = normalize_site_url(raw)
if not normalized:
invalid_urls.append(raw)
continue
for raw in raw_targets:
if scan_type == "mailbox":
normalized = (raw or "").strip().lower()
if not normalized or "@" not in normalized:
invalid.append(raw)
continue
elif scan_type == "entra_groups":
normalized = (raw or "").strip()
if not normalized:
invalid.append(raw)
continue
else:
normalized = normalize_site_url(raw) or ""
if not normalized:
invalid.append(raw)
continue
if normalized in seen:
continue
seen.add(normalized)
if skip_default_sites and is_default_site(normalized):
if scan_type in ("sharepoint", "sharepoint_root") and skip_default_sites and is_default_site(normalized):
skipped_default_urls.append(normalized)
continue
accepted_urls.append(normalized)
accepted.append(normalized)
with SessionLocal() as db:
now = datetime.utcnow()
job = ScanJob(
id=str(uuid.uuid4()),
source_type=source_type,
status="queued" if accepted_urls else "completed",
scan_type=scan_type,
status="queued" if accepted else "completed",
skip_default_sites=skip_default_sites,
tenant_profile_id=tenant_profile_id,
auth_tenant_id=tenant_id,
auth_client_id=client_id,
auth_client_secret=client_secret,
total_targets=len(accepted_urls),
total_targets=len(accepted),
skipped_targets=len(skipped_default_urls),
warning_message=None,
error_message=None,
created_at=now,
updated_at=now,
finished_at=now if not accepted_urls else None,
finished_at=now if not accepted else None,
)
if not accepted_urls:
job.warning_message = "No scannable sites after validation and default-site filtering"
if not accepted:
if scan_type == "mailbox":
job.warning_message = "No scannable mailboxes after validation"
else:
job.warning_message = "No scannable sites after validation and default-site filtering"
db.add(job)
db.flush()
for index, site_url in enumerate(accepted_urls, start=1):
for index, target in enumerate(accepted, start=1):
db.add(
ScanTarget(
job_id=job.id,
site_url=site_url,
site_url=target,
source_row=index,
status="queued",
attempts=0,
@ -646,15 +1058,14 @@ def _create_job_from_urls(
db.commit()
# Reload with profile for summary
stmt = select(ScanJob).options(joinedload(ScanJob.tenant_profile)).where(ScanJob.id == job.id)
job = db.execute(stmt).unique().scalar_one()
return ScanJobCreateResponse(
job=_to_job_summary(job),
accepted_urls=accepted_urls,
accepted_urls=accepted,
skipped_default_urls=skipped_default_urls,
invalid_urls=invalid_urls,
invalid_urls=invalid,
)
@ -663,6 +1074,7 @@ def _to_job_summary(job: ScanJob) -> ScanJobSummary:
id=job.id,
status=job.status,
source_type=job.source_type,
scan_type=job.scan_type or "sharepoint",
skip_default_sites=job.skip_default_sites,
tenant_profile_id=job.tenant_profile_id,
tenant_name=job.tenant_profile.name if job.tenant_profile else None,
@ -687,6 +1099,7 @@ def _to_tenant_item(profile: TenantProfile) -> TenantProfileItem:
id=profile.id,
name=profile.name,
tenant_id=profile.tenant_id,
primary_domain=profile.primary_domain,
client_id=profile.client_id,
has_certificate=bool(profile.cert_thumbprint),
cert_thumbprint=profile.cert_thumbprint,
@ -720,12 +1133,19 @@ def _ensure_schema_columns() -> None:
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS tenant_profile_id VARCHAR(36)",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS items_scanned INTEGER NOT NULL DEFAULT 0",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_activity TEXT",
"ALTER TABLE scan_jobs ADD COLUMN IF NOT EXISTS scan_type VARCHAR(32) NOT NULL DEFAULT 'sharepoint'",
"ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS permission_type VARCHAR(32)",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS primary_domain VARCHAR(256)",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS client_secret TEXT",
"ALTER TABLE tenant_profiles ALTER COLUMN client_secret DROP NOT NULL",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_private_key TEXT",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_public_pem TEXT",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_thumbprint VARCHAR(64)",
"ALTER TABLE tenant_profiles ADD COLUMN IF NOT EXISTS cert_expires_at TIMESTAMP",
"ALTER TABLE permission_deviations ADD COLUMN IF NOT EXISTS resolved_members TEXT",
"ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_at TIMESTAMP",
"ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_ok BOOLEAN",
"ALTER TABLE scan_targets ADD COLUMN IF NOT EXISTS last_probe_message TEXT",
]
with engine.begin() as conn:
for stmt in stmts:

View File

@ -16,9 +16,11 @@ class TenantProfile(Base):
id: Mapped[str] = mapped_column(String(36), primary_key=True)
name: Mapped[str] = mapped_column(String(256))
tenant_id: Mapped[str] = mapped_column(String(128))
primary_domain: Mapped[str | None] = mapped_column(String(256), nullable=True)
client_id: Mapped[str] = mapped_column(String(128))
client_secret: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_private_key: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_public_pem: Mapped[str | None] = mapped_column(Text, nullable=True)
cert_thumbprint: Mapped[str | None] = mapped_column(String(64), nullable=True)
cert_expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
@ -34,6 +36,7 @@ class ScanJob(Base):
status: Mapped[str] = mapped_column(String(32), default="queued", index=True)
source_type: Mapped[str] = mapped_column(String(16), default="manual")
skip_default_sites: Mapped[bool] = mapped_column(Boolean, default=True)
scan_type: Mapped[str] = mapped_column(String(32), default="sharepoint", index=True)
tenant_profile_id: Mapped[str | None] = mapped_column(
String(36), ForeignKey("tenant_profiles.id", ondelete="SET NULL"), nullable=True, index=True
)
@ -76,6 +79,10 @@ class ScanTarget(Base):
attempts: Mapped[int] = mapped_column(Integer, default=0)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
last_probe_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
last_probe_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
last_probe_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
@ -98,6 +105,7 @@ class PermissionDeviation(Base):
principal: Mapped[str] = mapped_column(Text)
role_name: Mapped[str] = mapped_column(Text)
delta_type: Mapped[str] = mapped_column(String(32))
permission_type: Mapped[str | None] = mapped_column(String(32), nullable=True)
resolved_members: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

View File

@ -1,467 +1,27 @@
from __future__ import annotations
"""
Backwards-compatibility shim. New code should import from clearview_app.scanners.
"""
import time
from collections.abc import Callable
from dataclasses import dataclass, field
from urllib.parse import urlparse
import msal
import requests
from .config import (
SCAN_HTTP_BACKOFF_SEC,
SCAN_HTTP_MAX_RETRIES,
SCAN_HTTP_TIMEOUT_SEC,
SCAN_LIST_PAGE_SIZE,
SCAN_MAX_ITEMS_PER_LIST,
SHAREPOINT_SCAN_MODE,
from .scanners.common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
)
from .scanners.sharepoint import (
probe_site,
resolve_sharing_link_members,
scan_site_for_deviations,
)
@dataclass
class DeviationRecord:
object_url: str
object_type: str
principal: str
role_name: str
delta_type: str
@dataclass
class ScanResult:
deviations: list[DeviationRecord]
warning: str | None = None
@dataclass(frozen=True)
class PermissionEntry:
principal: str
role_name: str
@dataclass(frozen=True)
class AuthConfig:
tenant_id: str
client_id: str
client_secret: str = ""
cert_private_key: str | None = None
cert_thumbprint: str | None = None
_TOKEN_CACHE: dict[str, str] = {}
ProgressCallback = Callable[[str, int], None]
def scan_site_for_deviations(
site_url: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""
Scan SharePoint permission deviations versus site-root role assignments.
Only SharePoint role assignments are used (site/list/folder/file scope).
No filesystem/NTFS permission model is used.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ScanResult(
deviations=[],
warning=(
"SharePoint scan mode is 'placeholder'. "
"Set SHAREPOINT_SCAN_MODE=sharepoint_app_only and configure Azure app credentials."
),
)
if SHAREPOINT_SCAN_MODE != "sharepoint_app_only":
raise RuntimeError(f"Unsupported SHAREPOINT_SCAN_MODE='{SHAREPOINT_SCAN_MODE}'")
_validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
parsed = urlparse(site_url)
host = parsed.netloc
_report(f"Connecting to {host}")
token = _get_token_for_host(host, auth)
base_headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
_report(f"Loading site permissions: {site_url}")
root_assignments = _get_role_assignments(
f"{site_url}/_api/web/roleassignments?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
root_set = set(root_assignments)
deviations: list[DeviationRecord] = []
warnings: list[str] = []
lists_url = (
f"{site_url}/_api/web/lists"
"?$select=Id,Title,BaseTemplate,Hidden,ItemCount,RootFolder/ServerRelativeUrl,HasUniqueRoleAssignments"
"&$expand=RootFolder"
)
for lst in _iter_paged(lists_url, base_headers):
if _to_bool(lst.get("Hidden")):
continue
if _to_int(lst.get("BaseTemplate")) != 101:
continue
list_id = str(lst.get("Id", "")).strip()
if not list_id:
continue
list_title = str(lst.get("Title") or "Document Library")
list_url = _absolute_url(host, str((lst.get("RootFolder") or {}).get("ServerRelativeUrl") or ""))
_report(f"Library: {list_title}")
if _to_bool(lst.get("HasUniqueRoleAssignments")):
list_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=list_url,
object_type="DocumentLibrary",
root_set=root_set,
current_set=set(list_assignments),
)
)
items_processed = 0
items_total = 0
items_url = (
f"{site_url}/_api/web/lists(guid'{list_id}')/items"
f"?$select=Id,FileRef,FileSystemObjectType,HasUniqueRoleAssignments&$top={SCAN_LIST_PAGE_SIZE}"
)
for item in _iter_paged(items_url, base_headers):
items_total += 1
if items_total % 50 == 0:
_report(f"Library: {list_title} ({items_total} items scanned)", 50)
if not _to_bool(item.get("HasUniqueRoleAssignments")):
continue
if items_processed >= SCAN_MAX_ITEMS_PER_LIST:
warnings.append(
f"List '{list_title}' hit SCAN_MAX_ITEMS_PER_LIST={SCAN_MAX_ITEMS_PER_LIST}; remaining unique-permission items skipped"
)
break
item_id = _to_int(item.get("Id"))
if item_id <= 0:
continue
file_ref = str(item.get("FileRef") or "")
if not file_ref:
continue
item_type = "File" if _to_int(item.get("FileSystemObjectType")) == 0 else "Folder"
item_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/items({item_id})/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=_absolute_url(host, file_ref),
object_type=item_type,
root_set=root_set,
current_set=set(item_assignments),
)
)
items_processed += 1
_report("Scan complete", 0)
warning = " | ".join(warnings) if warnings else None
return ScanResult(deviations=_deduplicate_hierarchical(deviations), warning=warning)
def resolve_sharing_link_members(
site_url: str,
group_name: str,
auth: AuthConfig,
) -> list[str]:
"""
Return the members of a SharePoint SharingLinks group.
Returns an empty list for anonymous links (no resolvable members).
"""
_validate_auth_config(auth)
parsed = urlparse(site_url)
host = parsed.netloc
token = _get_token_for_host(host, auth)
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
encoded = group_name.replace("'", "''")
url = (
f"{site_url}/_api/web/sitegroups/getbyname('{encoded}')/users"
"?$select=LoginName,Email,Title"
)
try:
data = _request_json(url, headers)
except Exception: # noqa: BLE001
return []
members: list[str] = []
for user in _extract_values(data):
email = str(user.get("Email") or "").strip()
login = str(user.get("LoginName") or "").strip()
title = str(user.get("Title") or "").strip()
# Skip built-in SharePoint system accounts
if login.upper().startswith("SHAREPOINT\\") or login.startswith("c:0(.s|true"):
continue
if email:
members.append(email)
elif title:
members.append(title)
elif login:
members.append(login)
return members
def _validate_auth_config(auth: AuthConfig) -> None:
missing = []
if not auth.tenant_id:
missing.append("tenant_id")
if not auth.client_id:
missing.append("client_id")
if not auth.client_secret and not (auth.cert_thumbprint and auth.cert_private_key):
missing.append("client_secret or certificate")
if missing:
raise RuntimeError("Missing required Azure auth settings: " + ", ".join(missing))
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
cached = _TOKEN_CACHE.get(cache_key)
if cached:
return cached
scope = f"https://{host}/.default"
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
if auth_method == "cert":
client_credential = {
"thumbprint": auth.cert_thumbprint,
"private_key": auth.cert_private_key,
}
else:
client_credential = auth.client_secret
app = msal.ConfidentialClientApplication(
client_id=auth.client_id,
authority=authority,
client_credential=client_credential,
)
result = app.acquire_token_for_client(scopes=[scope])
if "access_token" not in result:
error = result.get("error", "unknown")
description = result.get("error_description", "")
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
token = str(result["access_token"])
_TOKEN_CACHE[cache_key] = token
return token
def _iter_paged(url: str, headers: dict[str, str]):
next_url = url
while next_url:
data = _request_json(next_url, headers)
for item in _extract_values(data):
yield item
next_url = _extract_next_link(data)
def _request_json(url: str, headers: dict[str, str]) -> dict:
last_error: str | None = None
for attempt in range(1, SCAN_HTTP_MAX_RETRIES + 1):
try:
response = requests.get(url, headers=headers, timeout=SCAN_HTTP_TIMEOUT_SEC)
if response.status_code in (429, 503):
retry_after = _to_int(response.headers.get("Retry-After"))
delay = retry_after if retry_after > 0 else SCAN_HTTP_BACKOFF_SEC * attempt
time.sleep(delay)
continue
if response.status_code >= 400:
raise RuntimeError(f"HTTP {response.status_code}: {response.text[:300]}")
return response.json()
except Exception as exc: # noqa: BLE001
last_error = str(exc)
if attempt < SCAN_HTTP_MAX_RETRIES:
time.sleep(SCAN_HTTP_BACKOFF_SEC * attempt)
continue
raise RuntimeError(f"Request failed for {url}: {last_error}") from exc
raise RuntimeError(f"Request failed for {url}: {last_error}")
def _extract_values(data: dict) -> list[dict]:
if "value" in data and isinstance(data["value"], list):
return data["value"]
d = data.get("d")
if isinstance(d, dict):
results = d.get("results")
if isinstance(results, list):
return results
return []
def _extract_next_link(data: dict) -> str | None:
for key in ("@odata.nextLink", "odata.nextLink", "__next"):
value = data.get(key)
if isinstance(value, str) and value:
return value
d = data.get("d")
if isinstance(d, dict):
value = d.get("__next")
if isinstance(value, str) and value:
return value
return None
def _get_role_assignments(url: str, headers: dict[str, str]) -> list[PermissionEntry]:
data = _request_json(url, headers)
assignments: list[PermissionEntry] = []
for item in _extract_values(data):
member = item.get("Member") or {}
principal = str(member.get("LoginName") or member.get("Title") or "").strip()
if not principal:
continue
role_bindings = item.get("RoleDefinitionBindings")
roles = _extract_role_names(role_bindings)
for role_name in roles:
if role_name.lower() == "limited access":
continue
assignments.append(PermissionEntry(principal=principal, role_name=role_name))
return assignments
_ROLE_NAME_NL_TO_EN: dict[str, str] = {
"volledig beheer": "Full Control",
"ontwerpen": "Design",
"bewerken": "Edit",
"bijdragen": "Contribute",
"lezen": "Read",
"beperkte toegang": "Limited Access",
"goedkeuren": "Approve",
"hiërarchieën beheren": "Manage Hierarchy",
"weergeven alleen": "View Only",
"beperkt lezen": "Restricted Read",
}
def _normalize_role_name(name: str) -> str:
return _ROLE_NAME_NL_TO_EN.get(name.lower(), name)
def _extract_role_names(bindings) -> list[str]:
if isinstance(bindings, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in bindings if isinstance(x, dict) and x.get("Name")]
if isinstance(bindings, dict):
results = bindings.get("results")
if isinstance(results, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in results if isinstance(x, dict) and x.get("Name")]
return []
def _deduplicate_hierarchical(deviations: list[DeviationRecord]) -> list[DeviationRecord]:
"""
Remove child-level deviations that are already covered by a parent in the URL hierarchy.
A deviation for (principal, role) at /sites/X/Lib/FolderA is redundant when the same
(principal, role) was already reported at /sites/X/Lib or /sites/X/Lib/FolderA's parent.
Sorting by URL length ascending guarantees parents are evaluated before their children.
"""
sorted_devs = sorted(deviations, key=lambda d: len(d.object_url))
# Maps (principal, role_name) → list of ancestor URLs already reported
covered: dict[tuple[str, str], list[str]] = {}
result: list[DeviationRecord] = []
for dev in sorted_devs:
key = (dev.principal, dev.role_name)
ancestor_urls = covered.get(key)
if ancestor_urls:
parent = dev.object_url.rstrip("/")
already_covered = any(
parent == anc.rstrip("/") or parent.startswith(anc.rstrip("/") + "/")
for anc in ancestor_urls
)
if already_covered:
continue
else:
covered[key] = []
result.append(dev)
covered[key].append(dev.object_url)
return result
def _deviation_records_only_added(
object_url: str,
object_type: str,
root_set: set[PermissionEntry],
current_set: set[PermissionEntry],
) -> list[DeviationRecord]:
records: list[DeviationRecord] = []
for entry in sorted(current_set - root_set, key=lambda x: (x.principal.lower(), x.role_name.lower())):
records.append(
DeviationRecord(
object_url=object_url,
object_type=object_type,
principal=entry.principal,
role_name=entry.role_name,
delta_type="added",
)
)
return records
def _absolute_url(host: str, server_relative_url: str) -> str:
if not server_relative_url:
return f"https://{host}"
if server_relative_url.startswith("http://") or server_relative_url.startswith("https://"):
return server_relative_url
if not server_relative_url.startswith("/"):
server_relative_url = "/" + server_relative_url
return f"https://{host}{server_relative_url}"
def _to_int(value) -> int:
try:
if value is None:
return 0
return int(value)
except (TypeError, ValueError):
return 0
def _to_bool(value) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in ("1", "true", "yes")
return bool(value)
__all__ = [
"AuthConfig",
"DeviationRecord",
"ProbeResult",
"ProgressCallback",
"ScanResult",
"probe_site",
"resolve_sharing_link_members",
"scan_site_for_deviations",
]

View File

@ -0,0 +1,61 @@
"""
Scanner package dispatches scan requests by scan_type.
Public API:
- AuthConfig, DeviationRecord, ScanResult, ProbeResult, ProgressCallback (common)
- scan(scan_type, target, auth, progress) dispatcher
- probe(scan_type, target, auth) dispatcher
- resolve_sharing_link_members SharePoint-specific, re-exported
"""
from __future__ import annotations
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
)
from . import entra, mailbox, sharepoint
from .sharepoint import resolve_sharing_link_members
__all__ = [
"AuthConfig",
"DeviationRecord",
"ProbeResult",
"ProgressCallback",
"ScanResult",
"scan",
"probe",
"resolve_sharing_link_members",
]
def scan(
scan_type: str,
target: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""Dispatch a scan to the right scanner module."""
if scan_type == "sharepoint":
return sharepoint.scan_site_for_deviations(target, auth, progress)
if scan_type == "sharepoint_root":
return sharepoint.scan_site_root_permissions(target, auth, progress)
if scan_type == "mailbox":
return mailbox.scan_mailbox_for_deviations(target, auth, progress)
if scan_type == "entra_groups":
return entra.scan_entra_group(target, auth, progress)
raise RuntimeError(f"Unknown scan_type '{scan_type}'")
def probe(scan_type: str, target: str, auth: AuthConfig) -> ProbeResult:
"""Dispatch a preflight probe to the right scanner module."""
if scan_type in ("sharepoint", "sharepoint_root"):
return sharepoint.probe_site(target, auth)
if scan_type == "mailbox":
return mailbox.probe_mailbox(target, auth)
if scan_type == "entra_groups":
return entra.probe_entra(target, auth)
raise RuntimeError(f"Unknown scan_type '{scan_type}'")

View File

@ -0,0 +1,52 @@
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
@dataclass(frozen=True)
class AuthConfig:
tenant_id: str
client_id: str
client_secret: str = ""
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
@dataclass
class DeviationRecord:
object_url: str
object_type: str
principal: str
role_name: str
delta_type: str
permission_type: str | None = None
@dataclass
class ScanResult:
deviations: list[DeviationRecord]
warning: str | None = None
@dataclass
class ProbeResult:
ok: bool
message: str
ProgressCallback = Callable[[str, int], None]
def validate_auth_config(auth: AuthConfig) -> None:
missing = []
if not auth.tenant_id:
missing.append("tenant_id")
if not auth.client_id:
missing.append("client_id")
if not auth.client_secret and not (auth.cert_thumbprint and auth.cert_private_key):
missing.append("client_secret or certificate")
if missing:
raise RuntimeError("Missing required Azure auth settings: " + ", ".join(missing))

View File

@ -0,0 +1,293 @@
"""
Entra (Azure AD) groups scanner.
For each target (group object-id or email/UPN-style mail) Clearview retrieves:
- The group's display name and type (Microsoft 365 / Security / Distribution / Mail-enabled security)
- Every Member (recursive across nested groups)
- Every Owner (recursive across nested groups)
Each resulting user is stored as one deviation with:
- object_url = group display label
- object_type = 'EntraGroup'
- principal = userPrincipalName / mail / displayName
- role_name = 'Member' or 'Owner' (with " (via X > Y)" chain when nested)
- delta_type = 'present'
- permission_type = group type ("Microsoft 365" / "Security" / )
Authentication uses a Graph token obtained from MSAL via the existing tenant
certificate. Required Application permission: Group.Read.All on Microsoft Graph.
"""
from __future__ import annotations
from dataclasses import dataclass
from urllib.parse import quote
import requests
from ..config import SCAN_HTTP_BACKOFF_SEC, SCAN_HTTP_MAX_RETRIES, SCAN_HTTP_TIMEOUT_SEC
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
validate_auth_config,
)
from .sharepoint import _get_token_for_host, _request_json
@dataclass
class _ResolvedUser:
upn: str
via: list[str]
def scan_entra_group(
target: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
headers = _graph_headers(auth)
_report(f"Resolving group: {target}")
group = _resolve_group(target, headers)
if not group:
return ScanResult(deviations=[], warning=f"Group not found: {target}")
group_id = str(group.get("id") or "").strip()
label = (
str(group.get("displayName") or "").strip()
or str(group.get("mail") or "").strip()
or group_id
)
group_type = _classify_group_type(group)
_report(f"Members: {label}")
members = _collect_users(group_id, "/members", headers, [label])
_report(f"Owners: {label}")
owners = _collect_users(group_id, "/owners", headers, [label])
deviations: list[DeviationRecord] = []
for user in members:
deviations.append(_user_to_record(user, label, group_type, "Member"))
for user in owners:
deviations.append(_user_to_record(user, label, group_type, "Owner"))
_report("Scan complete", 0)
return ScanResult(deviations=deviations, warning=None)
def probe_entra(target: str, auth: AuthConfig) -> ProbeResult:
try:
validate_auth_config(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Config: {exc}")
if not (target or "").strip():
return ProbeResult(ok=False, message="Empty group target")
try:
headers = _graph_headers(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Token: {str(exc)[:240]}")
try:
group = _resolve_group(target, headers)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=_probe_hint(str(exc)))
if not group:
return ProbeResult(ok=False, message=f"Group not found: {target}")
return ProbeResult(ok=True, message="OK")
def list_all_groups(auth: AuthConfig, max_count: int = 50000) -> list[str]:
"""
Enumerate every group object id in the tenant (any group type) via Graph.
Returns a list of object IDs that can each be queued as a scan target.
"""
validate_auth_config(auth)
headers = _graph_headers(auth)
next_url: str | None = (
"https://graph.microsoft.com/v1.0/groups"
"?$select=id,displayName,mail&$top=999"
)
ids: list[str] = []
while next_url:
data = _request_json(next_url, headers)
for g in data.get("value", []):
gid = str(g.get("id") or "").strip()
if gid:
ids.append(gid)
if len(ids) > max_count:
raise RuntimeError(f"Group count exceeds limit {max_count}")
nl = data.get("@odata.nextLink")
next_url = nl if isinstance(nl, str) and nl else None
return ids
def _user_to_record(user: _ResolvedUser, group_label: str, group_type: str, role: str) -> DeviationRecord:
via_chain = " > ".join(user.via)
role_name = role
if user.via and user.via != [group_label]:
role_name = f"{role} (via {via_chain})"
return DeviationRecord(
object_url=group_label,
object_type="EntraGroup",
principal=user.upn,
role_name=role_name,
delta_type="present",
permission_type=group_type,
)
def _graph_headers(auth: AuthConfig) -> dict[str, str]:
token = _get_token_for_host("graph.microsoft.com", auth)
return {
"Accept": "application/json",
"Authorization": f"Bearer {token}",
}
def _resolve_group(target: str, headers: dict[str, str]) -> dict | None:
"""Accept a GUID, an email/SMTP, or a displayName."""
cleaned = (target or "").strip()
if not cleaned:
return None
if _is_guid(cleaned):
try:
return _request_json(
f"https://graph.microsoft.com/v1.0/groups/{cleaned}"
"?$select=id,displayName,mail,groupTypes,securityEnabled,mailEnabled",
headers,
)
except Exception: # noqa: BLE001
return None
safe = cleaned.replace("'", "''")
if "@" in cleaned:
url = (
"https://graph.microsoft.com/v1.0/groups"
f"?$filter=mail eq '{safe}'"
"&$select=id,displayName,mail,groupTypes,securityEnabled,mailEnabled"
)
else:
url = (
"https://graph.microsoft.com/v1.0/groups"
f"?$filter=displayName eq '{safe}'"
"&$select=id,displayName,mail,groupTypes,securityEnabled,mailEnabled"
)
try:
data = _request_json(url, headers)
except Exception: # noqa: BLE001
return None
items = data.get("value") or []
return items[0] if items else None
def _classify_group_type(group: dict) -> str:
types = group.get("groupTypes") or []
if isinstance(types, list) and any(str(t).lower() == "unified" for t in types):
return "Microsoft 365"
mail_enabled = bool(group.get("mailEnabled"))
security_enabled = bool(group.get("securityEnabled"))
if mail_enabled and security_enabled:
return "Mail-enabled Security"
if security_enabled:
return "Security"
if mail_enabled:
return "Distribution"
return "Group"
def _collect_users(
group_id: str,
relative: str,
headers: dict[str, str],
via_chain: list[str],
seen_groups: set[str] | None = None,
depth: int = 0,
) -> list[_ResolvedUser]:
if depth > 5:
return []
if seen_groups is None:
seen_groups = set()
next_url: str | None = (
f"https://graph.microsoft.com/v1.0/groups/{group_id}{relative}"
"?$select=id,userPrincipalName,mail,displayName&$top=999"
)
out: list[_ResolvedUser] = []
while next_url:
try:
data = _request_json(next_url, headers)
except Exception: # noqa: BLE001
break
for entry in data.get("value", []):
otype = str(entry.get("@odata.type") or "")
if otype.endswith("user"):
upn = (
str(entry.get("userPrincipalName") or "").strip()
or str(entry.get("mail") or "").strip()
or str(entry.get("displayName") or "").strip()
)
if upn:
out.append(_ResolvedUser(upn=upn, via=list(via_chain)))
elif otype.endswith("group"):
nested_id = str(entry.get("id") or "").strip()
if not nested_id or nested_id in seen_groups:
continue
seen_groups.add(nested_id)
nested_label = (
str(entry.get("displayName") or "").strip()
or str(entry.get("mail") or "").strip()
or nested_id
)
# Nested groups under /members are themselves "members" — we
# recurse via /members only. For /owners, owners of the nested
# group are not themselves owners of the parent in any
# meaningful sense, so we still recurse via /members.
out.extend(
_collect_users(
nested_id,
"/members",
headers,
via_chain + [nested_label],
seen_groups,
depth + 1,
)
)
nl = data.get("@odata.nextLink")
next_url = nl if isinstance(nl, str) and nl else None
return out
def _is_guid(value: str) -> bool:
if not value or len(value) != 36:
return False
parts = value.split("-")
if len(parts) != 5:
return False
return all(all(c in "0123456789abcdefABCDEF" for c in p) for p in parts)
def _probe_hint(error: str) -> str:
low = error.lower()
if "401" in low or "unauthorized" in low or "aadsts" in low:
return f"{error[:200]} — verify Group.Read.All permission and admin consent on Microsoft Graph"
if "403" in low or "forbidden" in low:
return f"{error[:200]} — Microsoft Graph permission denied (Group.Read.All missing?)"
if "404" in low:
return f"{error[:200]} — group not found in this tenant"
return error[:240]

View File

@ -0,0 +1,135 @@
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)][string]$TenantId,
[Parameter(Mandatory=$true)][string]$ClientId,
[Parameter(Mandatory=$true)][string]$Organization,
[Parameter(Mandatory=$true)][string]$Mailbox,
[Parameter(Mandatory=$true)][string]$CertPath
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
function Write-JsonResult {
param($Payload)
Write-Output ($Payload | ConvertTo-Json -Depth 6 -Compress)
}
try {
Import-Module ExchangeOnlineManagement -ErrorAction Stop
} catch {
Write-JsonResult @{ ok = $false; error = "ExchangeOnlineManagement module not available: $($_.Exception.Message)" }
exit 0
}
try {
$pfxPwd = $env:CLEARVIEW_PFX_PASSWORD
if ([string]::IsNullOrEmpty($pfxPwd)) {
Write-JsonResult @{ ok = $false; error = "CLEARVIEW_PFX_PASSWORD not set in environment" }
exit 0
}
$securePwd = ConvertTo-SecureString -String $pfxPwd -AsPlainText -Force
Connect-ExchangeOnline `
-AppId $ClientId `
-Organization $Organization `
-CertificateFilePath $CertPath `
-CertificatePassword $securePwd `
-ShowBanner:$false `
-ShowProgress:$false `
-ErrorAction Stop | Out-Null
} catch {
Write-JsonResult @{ ok = $false; error = "Connect-ExchangeOnline failed: $($_.Exception.Message)" }
exit 0
}
$entries = New-Object System.Collections.Generic.List[object]
$warnings = New-Object System.Collections.Generic.List[string]
try {
$mb = Get-EXOMailbox -Identity $Mailbox -PropertySets All -ErrorAction Stop
# 1) Full Access (and other mailbox-level permissions)
try {
$perms = Get-EXOMailboxPermission -Identity $mb.UserPrincipalName -ErrorAction Stop |
Where-Object { $_.User -notlike 'NT AUTHORITY\SELF' -and $_.User -notlike 'S-1-5-*' -and -not $_.IsInherited -and $_.Deny -eq $false }
foreach ($p in $perms) {
$rights = @($p.AccessRights) -join ', '
$entries.Add([pscustomobject]@{
permission_type = 'FullAccess'
object_type = 'Mailbox'
object = $mb.UserPrincipalName
principal = [string]$p.User
role_name = $rights
})
}
} catch {
$warnings.Add("MailboxPermission: $($_.Exception.Message)")
}
# 2) Send As
try {
$sendAs = Get-EXORecipientPermission -Identity $mb.UserPrincipalName -ErrorAction Stop |
Where-Object { $_.Trustee -notlike 'NT AUTHORITY\SELF' -and $_.Trustee -notlike 'S-1-5-*' -and $_.AccessControlType -eq 'Allow' }
foreach ($p in $sendAs) {
$rights = @($p.AccessRights) -join ', '
$entries.Add([pscustomobject]@{
permission_type = 'SendAs'
object_type = 'Mailbox'
object = $mb.UserPrincipalName
principal = [string]$p.Trustee
role_name = $rights
})
}
} catch {
$warnings.Add("RecipientPermission: $($_.Exception.Message)")
}
# 3) Send on Behalf — from mailbox property
try {
if ($mb.GrantSendOnBehalfTo) {
foreach ($t in $mb.GrantSendOnBehalfTo) {
$entries.Add([pscustomobject]@{
permission_type = 'SendOnBehalf'
object_type = 'Mailbox'
object = $mb.UserPrincipalName
principal = [string]$t
role_name = 'SendOnBehalf'
})
}
}
} catch {
$warnings.Add("GrantSendOnBehalfTo: $($_.Exception.Message)")
}
# 4) Folder-level delegations on Calendar and Inbox
foreach ($folder in 'Calendar', 'Inbox') {
try {
$folderPath = "{0}:\{1}" -f $mb.UserPrincipalName, $folder
$fp = Get-EXOMailboxFolderPermission -Identity $folderPath -ErrorAction Stop |
Where-Object { $_.User.DisplayName -notin @('Default', 'Anonymous') -and $_.AccessRights -notcontains 'None' }
foreach ($p in $fp) {
$rights = @($p.AccessRights) -join ', '
$entries.Add([pscustomobject]@{
permission_type = "Folder:$folder"
object_type = 'MailboxFolder'
object = "$($mb.UserPrincipalName)/$folder"
principal = [string]$p.User.DisplayName
role_name = $rights
})
}
} catch {
$warnings.Add("FolderPermission ${folder}: $($_.Exception.Message)")
}
}
Write-JsonResult @{
ok = $true
mailbox = $mb.UserPrincipalName
entries = $entries
warnings = $warnings
}
} catch {
Write-JsonResult @{ ok = $false; error = $_.Exception.Message }
} finally {
try { Disconnect-ExchangeOnline -Confirm:$false -InformationAction SilentlyContinue -ErrorAction SilentlyContinue | Out-Null } catch {}
}

View File

@ -0,0 +1,67 @@
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)][string]$TenantId,
[Parameter(Mandatory=$true)][string]$ClientId,
[Parameter(Mandatory=$true)][string]$Organization,
[Parameter(Mandatory=$true)][string]$CertPath,
[Parameter(Mandatory=$false)][int]$MaxMailboxes = 50000
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
function Write-JsonResult {
param($Payload)
Write-Output ($Payload | ConvertTo-Json -Depth 4 -Compress)
}
try {
Import-Module ExchangeOnlineManagement -ErrorAction Stop
} catch {
Write-JsonResult @{ ok = $false; error = "ExchangeOnlineManagement module not available: $($_.Exception.Message)" }
exit 0
}
try {
$pfxPwd = $env:CLEARVIEW_PFX_PASSWORD
if ([string]::IsNullOrEmpty($pfxPwd)) {
Write-JsonResult @{ ok = $false; error = "CLEARVIEW_PFX_PASSWORD not set in environment" }
exit 0
}
$securePwd = ConvertTo-SecureString -String $pfxPwd -AsPlainText -Force
Connect-ExchangeOnline `
-AppId $ClientId `
-Organization $Organization `
-CertificateFilePath $CertPath `
-CertificatePassword $securePwd `
-ShowBanner:$false `
-ShowProgress:$false `
-ErrorAction Stop | Out-Null
} catch {
Write-JsonResult @{ ok = $false; error = "Connect-ExchangeOnline failed: $($_.Exception.Message)" }
exit 0
}
try {
$boxes = Get-EXOMailbox -ResultSize Unlimited -PropertySets Minimum -ErrorAction Stop |
Select-Object -ExpandProperty UserPrincipalName
if ($boxes.Count -gt $MaxMailboxes) {
Write-JsonResult @{
ok = $false
error = "Mailbox count $($boxes.Count) exceeds MaxMailboxes=$MaxMailboxes"
count = $boxes.Count
}
exit 0
}
Write-JsonResult @{
ok = $true
count = $boxes.Count
mailboxes = $boxes
}
} catch {
Write-JsonResult @{ ok = $false; error = $_.Exception.Message }
} finally {
try { Disconnect-ExchangeOnline -Confirm:$false -InformationAction SilentlyContinue -ErrorAction SilentlyContinue | Out-Null } catch {}
}

View File

@ -0,0 +1,57 @@
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)][string]$TenantId,
[Parameter(Mandatory=$true)][string]$ClientId,
[Parameter(Mandatory=$true)][string]$Organization,
[Parameter(Mandatory=$true)][string]$Mailbox,
[Parameter(Mandatory=$true)][string]$CertPath
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
function Write-Result {
param([bool]$Ok, [string]$Message)
$obj = [pscustomobject]@{ ok = $Ok; message = $Message }
Write-Output ($obj | ConvertTo-Json -Compress)
}
try {
Import-Module ExchangeOnlineManagement -ErrorAction Stop
} catch {
Write-Result -Ok $false -Message "ExchangeOnlineManagement module not available: $($_.Exception.Message)"
exit 0
}
try {
$pfxPwd = $env:CLEARVIEW_PFX_PASSWORD
if ([string]::IsNullOrEmpty($pfxPwd)) {
Write-Result -Ok $false -Message "CLEARVIEW_PFX_PASSWORD not set in environment"
exit 0
}
$securePwd = ConvertTo-SecureString -String $pfxPwd -AsPlainText -Force
Connect-ExchangeOnline `
-AppId $ClientId `
-Organization $Organization `
-CertificateFilePath $CertPath `
-CertificatePassword $securePwd `
-ShowBanner:$false `
-ShowProgress:$false `
-ErrorAction Stop | Out-Null
} catch {
Write-Result -Ok $false -Message "Connect-ExchangeOnline failed: $($_.Exception.Message)"
exit 0
}
try {
$box = Get-EXOMailbox -Identity $Mailbox -ErrorAction Stop -PropertySets Minimum
if ($null -eq $box) {
Write-Result -Ok $false -Message "Mailbox '$Mailbox' not found"
} else {
Write-Result -Ok $true -Message "OK"
}
} catch {
Write-Result -Ok $false -Message "Get-EXOMailbox failed: $($_.Exception.Message)"
} finally {
try { Disconnect-ExchangeOnline -Confirm:$false -InformationAction SilentlyContinue -ErrorAction SilentlyContinue | Out-Null } catch {}
}

View File

@ -0,0 +1,257 @@
"""
Mailbox permission scanner Exchange Online via PowerShell subprocess.
Requires `pwsh` and the `ExchangeOnlineManagement` module to be installed
in the runtime container. Authentication uses certificate-based app-only
auth, identical to the SharePoint scanner's tenant profile.
"""
from __future__ import annotations
import json
import os
import secrets
import shutil
import subprocess
import tempfile
from pathlib import Path
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.serialization import pkcs12
from cryptography import x509
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
validate_auth_config,
)
_SCRIPTS_DIR = Path(__file__).parent / "exo_scripts"
_PROBE_SCRIPT = _SCRIPTS_DIR / "probe.ps1"
_GET_PERMS_SCRIPT = _SCRIPTS_DIR / "get-permissions.ps1"
_LIST_SCRIPT = _SCRIPTS_DIR / "list-mailboxes.ps1"
# pwsh subprocess timeout — connect can take ~10s, scan up to a few minutes per mailbox
_PWSH_TIMEOUT_SEC = 600
def scan_mailbox_for_deviations(
upn: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
validate_auth_config(auth)
_require_certificate(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
organization = _resolve_organization(auth, upn)
_report(f"Connecting to Exchange Online ({organization})")
payload = _run_pwsh(_GET_PERMS_SCRIPT, auth, organization, upn)
if not payload.get("ok"):
raise RuntimeError(payload.get("error") or "Mailbox scan failed")
entries = payload.get("entries") or []
warnings = payload.get("warnings") or []
mailbox_id = payload.get("mailbox") or upn
_report(f"Mailbox: {mailbox_id} ({len(entries)} entries)", len(entries))
deviations: list[DeviationRecord] = []
for entry in entries:
principal = str(entry.get("principal") or "").strip()
if not principal:
continue
deviations.append(
DeviationRecord(
object_url=str(entry.get("object") or mailbox_id),
object_type=str(entry.get("object_type") or "Mailbox"),
principal=principal,
role_name=str(entry.get("role_name") or ""),
delta_type="present",
permission_type=str(entry.get("permission_type") or ""),
)
)
_report("Scan complete", 0)
warning_text = " | ".join(str(w) for w in warnings) if warnings else None
return ScanResult(deviations=deviations, warning=warning_text)
def list_mailboxes(organization: str, auth: AuthConfig, max_count: int = 50000) -> list[str]:
"""
Enumerate every UserPrincipalName in the tenant via Exchange Online.
`organization` must be the tenant's primary domain (e.g. contoso.onmicrosoft.com).
Raises on connection failure or when the count exceeds max_count.
"""
validate_auth_config(auth)
_require_certificate(auth)
if not shutil.which("pwsh"):
raise RuntimeError("pwsh not available in runtime")
payload = _run_pwsh(_LIST_SCRIPT, auth, organization, mailbox=None, timeout_sec=300)
if not payload.get("ok"):
raise RuntimeError(payload.get("error") or "Mailbox enumeration failed")
mailboxes = payload.get("mailboxes") or []
if not isinstance(mailboxes, list):
return []
cleaned = [str(m).strip().lower() for m in mailboxes if isinstance(m, str) and m.strip()]
if len(cleaned) > max_count:
raise RuntimeError(f"Mailbox count {len(cleaned)} exceeds limit {max_count}")
return cleaned
def probe_mailbox(upn: str, auth: AuthConfig) -> ProbeResult:
try:
validate_auth_config(auth)
_require_certificate(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Config: {exc}")
if not shutil.which("pwsh"):
return ProbeResult(ok=False, message="pwsh not available in runtime")
if not (upn or "").strip() or "@" not in upn:
return ProbeResult(ok=False, message="Invalid mailbox (UPN/email)")
organization = _resolve_organization(auth, upn)
try:
payload = _run_pwsh(_PROBE_SCRIPT, auth, organization, upn)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"pwsh: {str(exc)[:240]}")
ok = bool(payload.get("ok"))
message = str(payload.get("message") or ("OK" if ok else "Unknown error"))
if not ok:
message = _probe_hint(message)
return ProbeResult(ok=ok, message=message)
def _require_certificate(auth: AuthConfig) -> None:
if not (auth.cert_thumbprint and auth.cert_private_key):
raise RuntimeError(
"Mailbox scanning requires a certificate on the tenant profile "
"(client secret is not supported by Exchange Online for app-only auth)."
)
def _resolve_organization(auth: AuthConfig, upn: str) -> str:
"""
Exchange Online expects the organization as the tenant's primary domain
(e.g. contoso.onmicrosoft.com). The UPN domain is the practical default.
"""
domain = upn.split("@", 1)[-1].strip().lower()
return domain or auth.tenant_id
def _run_pwsh(
script: Path,
auth: AuthConfig,
organization: str,
mailbox: str | None = None,
timeout_sec: int = _PWSH_TIMEOUT_SEC,
) -> dict:
if not shutil.which("pwsh"):
raise RuntimeError("pwsh not available in runtime")
public_pem = _resolve_public_cert_pem(auth)
pfx_password = secrets.token_urlsafe(16)
with tempfile.TemporaryDirectory(prefix="clearview-exo-") as tmp:
pfx_path = Path(tmp) / "cert.pfx"
_write_pfx(
private_key_pem=auth.cert_private_key or "",
public_cert_pem=public_pem,
out_path=pfx_path,
password=pfx_password,
)
cmd = [
"pwsh",
"-NoProfile",
"-NonInteractive",
"-File", str(script),
"-TenantId", auth.tenant_id,
"-ClientId", auth.client_id,
"-Organization", organization,
"-CertPath", str(pfx_path),
]
if mailbox is not None:
cmd.extend(["-Mailbox", mailbox])
env = os.environ.copy()
env["CLEARVIEW_PFX_PASSWORD"] = pfx_password
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout_sec,
env=env,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(f"pwsh script timed out after {timeout_sec}s") from exc
if result.returncode != 0:
stderr = (result.stderr or "").strip()[:500]
raise RuntimeError(f"pwsh exited with code {result.returncode}: {stderr}")
out = (result.stdout or "").strip()
if not out:
raise RuntimeError("pwsh returned empty output")
last_line = out.splitlines()[-1]
try:
return json.loads(last_line)
except json.JSONDecodeError as exc:
raise RuntimeError(f"Could not parse pwsh JSON output: {out[:500]}") from exc
def _resolve_public_cert_pem(auth: AuthConfig) -> str:
"""
The public cert PEM is stored on the tenant profile via the AuthConfig
extension below. This helper raises if it is missing happens for tenants
whose certificate was generated before cert_public_pem was stored.
"""
pem = getattr(auth, "cert_public_pem", None)
if not pem:
raise RuntimeError(
"Tenant certificate has no public PEM stored. "
"Regenerate the certificate to enable mailbox scanning."
)
return pem
def _write_pfx(private_key_pem: str, public_cert_pem: str, out_path: Path, password: str) -> None:
private_key = serialization.load_pem_private_key(private_key_pem.encode(), password=None)
cert = x509.load_pem_x509_certificate(public_cert_pem.encode())
pfx_bytes = pkcs12.serialize_key_and_certificates(
name=b"clearview",
key=private_key,
cert=cert,
cas=None,
encryption_algorithm=serialization.BestAvailableEncryption(password.encode()),
)
out_path.write_bytes(pfx_bytes)
def _probe_hint(message: str) -> str:
low = message.lower()
if "unauthorized" in low or "401" in low or "aadsts" in low:
return f"{message[:200]} — verify Exchange.ManageAsApp permission, admin consent, and the Exchange Administrator role assignment"
if "not found" in low or "couldn't find object" in low:
return f"{message[:200]} — mailbox not found in this tenant"
if "module not available" in low:
return f"{message[:200]} — install the ExchangeOnlineManagement module in the container"
return message[:240]

View File

@ -0,0 +1,828 @@
from __future__ import annotations
import re
import time
from dataclasses import dataclass
from urllib.parse import urlparse
import msal
import requests
from ..config import (
SCAN_HTTP_BACKOFF_SEC,
SCAN_HTTP_MAX_RETRIES,
SCAN_HTTP_TIMEOUT_SEC,
SCAN_LIST_PAGE_SIZE,
SCAN_MAX_ITEMS_PER_LIST,
SHAREPOINT_SCAN_MODE,
)
from .common import (
AuthConfig,
DeviationRecord,
ProbeResult,
ProgressCallback,
ScanResult,
validate_auth_config,
)
@dataclass(frozen=True)
class PermissionEntry:
principal: str
role_name: str
_TOKEN_CACHE: dict[str, str] = {}
def scan_site_for_deviations(
site_url: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""
Scan SharePoint permission deviations versus site-root role assignments.
Only SharePoint role assignments are used (site/list/folder/file scope).
No filesystem/NTFS permission model is used.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ScanResult(
deviations=[],
warning=(
"SharePoint scan mode is 'placeholder'. "
"Set SHAREPOINT_SCAN_MODE=sharepoint_app_only and configure Azure app credentials."
),
)
if SHAREPOINT_SCAN_MODE != "sharepoint_app_only":
raise RuntimeError(f"Unsupported SHAREPOINT_SCAN_MODE='{SHAREPOINT_SCAN_MODE}'")
validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
parsed = urlparse(site_url)
host = parsed.netloc
_report(f"Connecting to {host}")
token = _get_token_for_host(host, auth)
base_headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
_report(f"Loading site permissions: {site_url}")
root_assignments = _get_role_assignments(
f"{site_url}/_api/web/roleassignments?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
root_set = set(root_assignments)
deviations: list[DeviationRecord] = []
warnings: list[str] = []
lists_url = (
f"{site_url}/_api/web/lists"
"?$select=Id,Title,BaseTemplate,Hidden,ItemCount,RootFolder/ServerRelativeUrl,HasUniqueRoleAssignments"
"&$expand=RootFolder"
)
for lst in _iter_paged(lists_url, base_headers):
if _to_bool(lst.get("Hidden")):
continue
if _to_int(lst.get("BaseTemplate")) != 101:
continue
list_id = str(lst.get("Id", "")).strip()
if not list_id:
continue
list_title = str(lst.get("Title") or "Document Library")
list_url = _absolute_url(host, str((lst.get("RootFolder") or {}).get("ServerRelativeUrl") or ""))
_report(f"Library: {list_title}")
if _to_bool(lst.get("HasUniqueRoleAssignments")):
list_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=list_url,
object_type="DocumentLibrary",
root_set=root_set,
current_set=set(list_assignments),
)
)
items_processed = 0
items_total = 0
items_url = (
f"{site_url}/_api/web/lists(guid'{list_id}')/items"
f"?$select=Id,FileRef,FileSystemObjectType,HasUniqueRoleAssignments&$top={SCAN_LIST_PAGE_SIZE}"
)
for item in _iter_paged(items_url, base_headers):
items_total += 1
if items_total % 50 == 0:
_report(f"Library: {list_title} ({items_total} items scanned)", 50)
if not _to_bool(item.get("HasUniqueRoleAssignments")):
continue
if items_processed >= SCAN_MAX_ITEMS_PER_LIST:
warnings.append(
f"List '{list_title}' hit SCAN_MAX_ITEMS_PER_LIST={SCAN_MAX_ITEMS_PER_LIST}; remaining unique-permission items skipped"
)
break
item_id = _to_int(item.get("Id"))
if item_id <= 0:
continue
file_ref = str(item.get("FileRef") or "")
if not file_ref:
continue
item_type = "File" if _to_int(item.get("FileSystemObjectType")) == 0 else "Folder"
item_assignments = _get_role_assignments(
f"{site_url}/_api/web/lists(guid'{list_id}')/items({item_id})/roleassignments"
"?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
base_headers,
)
deviations.extend(
_deviation_records_only_added(
object_url=_absolute_url(host, file_ref),
object_type=item_type,
root_set=root_set,
current_set=set(item_assignments),
)
)
items_processed += 1
_report("Scan complete", 0)
warning = " | ".join(warnings) if warnings else None
return ScanResult(deviations=_deduplicate_hierarchical(deviations), warning=warning)
def scan_site_root_permissions(
site_url: str,
auth: AuthConfig,
progress: ProgressCallback | None = None,
) -> ScanResult:
"""
Collect the role assignments at the site-root level without traversing
libraries, folders, or items. Each assignment is reported as a record
with delta_type='root' so it is distinguishable from the deviation scan.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ScanResult(
deviations=[],
warning="SharePoint scan mode is 'placeholder'.",
)
if SHAREPOINT_SCAN_MODE != "sharepoint_app_only":
raise RuntimeError(f"Unsupported SHAREPOINT_SCAN_MODE='{SHAREPOINT_SCAN_MODE}'")
validate_auth_config(auth)
def _report(activity: str, items: int = 0) -> None:
if progress:
progress(activity, items)
parsed = urlparse(site_url)
host = parsed.netloc
_report(f"Connecting to {host}")
token = _get_token_for_host(host, auth)
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
_report(f"Loading root permissions: {site_url}")
root_assignments = _get_role_assignments(
f"{site_url}/_api/web/roleassignments?$expand=Member,RoleDefinitionBindings"
"&$select=Member/LoginName,Member/Title,Member/PrincipalType,RoleDefinitionBindings/Name",
headers,
)
filtered = [e for e in root_assignments if not _is_noise_principal(e.principal)]
records: list[DeviationRecord] = []
for entry in sorted(filtered, key=lambda e: (e.principal.lower(), e.role_name.lower())):
records.append(
DeviationRecord(
object_url=site_url,
object_type="Site",
principal=entry.principal,
role_name=entry.role_name,
delta_type="root",
)
)
_report("Scan complete", 0)
skipped = len(root_assignments) - len(filtered)
warning = f"{skipped} SharingLinks/system entries hidden" if skipped else None
return ScanResult(deviations=records, warning=warning)
def is_sharepoint_group_principal(principal: str) -> bool:
"""
Heuristic: a SharePoint group has a plain display-name principal
(no claim-encoded prefix, no email shape). Used to decide which entries
can be resolved via /_api/web/sitegroups/getbyname.
"""
if not principal:
return False
p = principal.strip()
if not p:
return False
# Claim-encoded principals: c:0o.c|..., i:0#.f|..., c:0t.c|..., c:0(.s|...
if p.startswith(("c:0", "i:0")):
return False
if "|" in p:
return False
# Email-shape user
if "@" in p:
return False
# SharingLinks are handled by the dedicated resolver
if p.lower().startswith("sharinglinks."):
return False
return True
# SharePoint PrincipalType: 1=User, 2=DistributionList, 4=SecurityGroup,
# 8=SharePointGroup, 16=All.
_PRINCIPAL_TYPE_USER = 1
_GUID_RE = re.compile(
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
)
def _extract_user_upn(login: str) -> str:
"""
Pull the UPN/email out of a user claim LoginName so individually granted
users render readably instead of as a raw claim string.
i:0#.f|membership|jan@contoso.com -> jan@contoso.com
i:0#.w|contoso\\jan -> "" (no UPN; caller falls back to Title)
"""
if not login:
return ""
tail = login.rsplit("|", 1)[-1].strip()
return tail if "@" in tail else ""
def _display_principal(login: str, title: str, principal_type: int) -> str:
"""
Choose the most readable identity for a role-assignment member.
For individual users we surface the UPN/email instead of the claim-encoded
LoginName when one is present; for everything else (groups, system/built-in
accounts, on-prem claims) we keep the original LoginName so claim object ids
stay resolvable and the site-root noise filter still recognises them.
"""
if principal_type == _PRINCIPAL_TYPE_USER:
upn = _extract_user_upn(login)
if upn:
return upn
return login or title
def _extract_aad_group_object_id(principal: str) -> str | None:
"""
Return the Entra/AAD object id encoded in a claim principal, or None.
c:0t.c|tenant|<guid> -> <guid> (security group)
c:0o.c|federateddirectoryclaimprovider|<guid> -> <guid> (M365 group members)
c:0o.c|federateddirectoryclaimprovider|<guid>_o -> <guid> (M365 group owners)
"""
if not principal:
return None
lowered = principal.strip().lower()
if not (
lowered.startswith("c:0t.c|tenant|")
or lowered.startswith("c:0o.c|federateddirectoryclaimprovider|")
):
return None
tail = principal.rsplit("|", 1)[-1].strip()
if tail.endswith("_o"):
tail = tail[:-2]
return tail if _GUID_RE.match(tail) else None
def is_aad_group_principal(principal: str) -> bool:
"""True when the principal is an Entra/AAD or M365 group we can expand via Graph."""
return _extract_aad_group_object_id(principal) is not None
def resolve_aad_group_members(principal: str, auth: AuthConfig) -> list[str]:
"""
Expand an Entra/AAD or M365 group assigned directly at root into its
member (and owner) list via Microsoft Graph. Returns an empty list when
the principal is not such a group or when Graph cannot read it.
"""
object_id = _extract_aad_group_object_id(principal)
if not object_id:
return []
validate_auth_config(auth)
seen: set[str] = set()
return _expand_aad_group_by_id(object_id, auth, seen, depth=0)
def _is_noise_principal(principal: str) -> bool:
"""
SharePoint surfaces several principal types at site-root level that are
not part of a meaningful root-permission inventory:
- SharingLinks.<guid>.<LinkType>.<guid> auto-created when a child item is shared
- System / built-in accounts (SHAREPOINT\\system, NT AUTHORITY\\*)
- "Limited Access System Group" SP groups
"""
if not principal:
return True
p = principal.lower()
if "sharinglinks." in p:
return True
if p.startswith("sharepoint\\") or p.startswith("nt authority\\"):
return True
if "limited access system group" in p:
return True
return False
def probe_site(site_url: str, auth: AuthConfig) -> ProbeResult:
"""
Lightweight preflight: validate that the configured credentials can
reach the site and read role assignments.
"""
if SHAREPOINT_SCAN_MODE == "placeholder":
return ProbeResult(ok=False, message="SHAREPOINT_SCAN_MODE=placeholder")
try:
validate_auth_config(auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Config: {exc}")
parsed = urlparse(site_url)
host = parsed.netloc
if not host:
return ProbeResult(ok=False, message="Invalid site URL")
try:
token = _get_token_for_host(host, auth)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=f"Token: {str(exc)[:240]}")
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
try:
_probe_request(f"{site_url}/_api/web?$select=Title", headers)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=_probe_hint(str(exc), stage="site"))
try:
_probe_request(
f"{site_url}/_api/web/roleassignments?$top=1&$select=PrincipalId",
headers,
)
except Exception as exc: # noqa: BLE001
return ProbeResult(ok=False, message=_probe_hint(str(exc), stage="roleassignments"))
return ProbeResult(ok=True, message="OK")
def resolve_sharing_link_members(
site_url: str,
group_name: str,
auth: AuthConfig,
) -> list[str]:
"""
Return members of a SharePoint group. When a member is itself an
M365/AAD group, expand it via Microsoft Graph (recursion-bounded).
Returns an empty list for anonymous links and groups that cannot be read.
"""
raw_users = _get_sp_group_users(site_url, group_name, auth)
members: list[str] = []
seen_groups: set[str] = set()
for user in raw_users:
members.extend(_render_principal(user, auth, seen_groups, depth=0))
return members
def _get_sp_group_users(site_url: str, group_name: str, auth: AuthConfig) -> list[dict]:
validate_auth_config(auth)
parsed = urlparse(site_url)
host = parsed.netloc
token = _get_token_for_host(host, auth)
headers = {
"Accept": "application/json;odata=nometadata",
"Authorization": f"Bearer {token}",
}
encoded = group_name.replace("'", "''")
url = (
f"{site_url}/_api/web/sitegroups/getbyname('{encoded}')/users"
"?$select=LoginName,Email,Title,PrincipalType"
)
try:
data = _request_json(url, headers)
except Exception: # noqa: BLE001
return []
return list(_extract_values(data))
# SharePoint PrincipalType values:
# 1 = User, 2 = DistributionList, 4 = SecurityGroup, 8 = SharePointGroup, 16 = All
_PRINCIPAL_TYPE_GROUP = {2, 4}
def _render_principal(user: dict, auth: AuthConfig, seen: set[str], depth: int) -> list[str]:
email = str(user.get("Email") or "").strip()
login = str(user.get("LoginName") or "").strip()
title = str(user.get("Title") or "").strip()
if login.upper().startswith("SHAREPOINT\\") or login.startswith("c:0(.s|true"):
return []
is_group = (
_to_int(user.get("PrincipalType")) in _PRINCIPAL_TYPE_GROUP
or "federateddirectoryclaimprovider" in login.lower()
or "tenant|" in login.lower()
)
if is_group and email and depth < 3:
nested = _expand_aad_group_via_graph(email, auth, seen, depth=depth + 1)
label = title or email
if nested:
return [f"{label} [{', '.join(nested)}]"]
return [f"{label} (group, no readable members)"]
if email:
return [email]
if title:
return [title]
if login:
return [login]
return []
def _expand_aad_group_via_graph(
group_mail: str,
auth: AuthConfig,
seen: set[str],
depth: int,
) -> list[str]:
if depth > 3:
return [f"… (recursion limit)"]
key = group_mail.strip().lower()
if not key or key in seen:
return []
seen.add(key)
try:
token = _get_token_for_host("graph.microsoft.com", auth)
except Exception: # noqa: BLE001
return []
headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"}
safe_mail = key.replace("'", "''")
lookup_url = (
"https://graph.microsoft.com/v1.0/groups"
f"?$filter=mail eq '{safe_mail}'&$select=id,displayName"
)
try:
data = _request_json(lookup_url, headers)
except Exception: # noqa: BLE001
return []
groups = data.get("value") or []
if not groups:
return []
group_id = str(groups[0].get("id") or "").strip()
if not group_id:
return []
return _expand_aad_group_by_id(group_id, auth, seen, depth)
def _expand_aad_group_by_id(
group_id: str,
auth: AuthConfig,
seen: set[str],
depth: int,
) -> list[str]:
if depth > 3:
return ["… (recursion limit)"]
key = f"id:{group_id.strip().lower()}"
if not group_id.strip() or key in seen:
return []
seen.add(key)
try:
token = _get_token_for_host("graph.microsoft.com", auth)
except Exception: # noqa: BLE001
return []
headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"}
out: list[str] = []
out.extend(_graph_collect(f"/groups/{group_id}/members", headers, auth, seen, depth, owner=False))
out.extend(_graph_collect(f"/groups/{group_id}/owners", headers, auth, seen, depth, owner=True))
return _dedup_preserve_order(out)
def _graph_collect(
relative: str,
headers: dict[str, str],
auth: AuthConfig,
seen: set[str],
depth: int,
owner: bool,
) -> list[str]:
next_url: str | None = (
f"https://graph.microsoft.com/v1.0{relative}"
"?$select=id,userPrincipalName,mail,displayName"
)
out: list[str] = []
while next_url:
try:
data = _request_json(next_url, headers)
except Exception: # noqa: BLE001
return out
for entry in data.get("value", []):
otype = str(entry.get("@odata.type") or "")
if otype.endswith("user"):
upn = (
str(entry.get("userPrincipalName") or "").strip()
or str(entry.get("mail") or "").strip()
or str(entry.get("displayName") or "").strip()
)
if upn:
out.append(f"{upn} (owner)" if owner else upn)
elif otype.endswith("group"):
nested_mail = str(entry.get("mail") or "").strip()
if nested_mail:
nested = _expand_aad_group_via_graph(nested_mail, auth, seen, depth + 1)
label = str(entry.get("displayName") or nested_mail)
if nested:
out.append(f"{label} [{', '.join(nested)}]")
else:
out.append(f"{label} (group, no readable members)")
nl = data.get("@odata.nextLink")
next_url = nl if isinstance(nl, str) and nl else None
return out
def _dedup_preserve_order(items: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
for item in items:
if item not in seen:
seen.add(item)
result.append(item)
return result
def _probe_request(url: str, headers: dict[str, str]) -> None:
response = requests.get(url, headers=headers, timeout=SCAN_HTTP_TIMEOUT_SEC)
if response.status_code >= 400:
snippet = (response.text or "").strip()[:200]
raise RuntimeError(f"HTTP {response.status_code}: {snippet or '{}'}")
def _probe_hint(error: str, stage: str) -> str:
if "401" in error:
if stage == "roleassignments":
return f"{error[:180]} — likely missing admin consent or insufficient permission"
return f"{error[:180]} — likely certificate not uploaded in Azure, or wrong tenant/client id"
if "403" in error:
return f"{error[:180]} — app has no access to this site (Sites.Selected without per-site grant?)"
if "404" in error:
return f"{error[:180]} — site not found"
return error[:220]
def _get_token_for_host(host: str, auth: AuthConfig) -> str:
auth_method = "cert" if auth.cert_thumbprint and auth.cert_private_key else "secret"
cache_key = f"{host}|{auth.tenant_id}|{auth.client_id}|{auth_method}"
cached = _TOKEN_CACHE.get(cache_key)
if cached:
return cached
scope = f"https://{host}/.default"
authority = f"https://login.microsoftonline.com/{auth.tenant_id}"
if auth_method == "cert":
client_credential = {
"thumbprint": auth.cert_thumbprint,
"private_key": auth.cert_private_key,
}
else:
client_credential = auth.client_secret
app = msal.ConfidentialClientApplication(
client_id=auth.client_id,
authority=authority,
client_credential=client_credential,
)
result = app.acquire_token_for_client(scopes=[scope])
if "access_token" not in result:
error = result.get("error", "unknown")
description = result.get("error_description", "")
raise RuntimeError(f"Token request failed ({error}): {description[:300]}")
token = str(result["access_token"])
_TOKEN_CACHE[cache_key] = token
return token
def _iter_paged(url: str, headers: dict[str, str]):
next_url = url
while next_url:
data = _request_json(next_url, headers)
for item in _extract_values(data):
yield item
next_url = _extract_next_link(data)
def _request_json(url: str, headers: dict[str, str]) -> dict:
last_error: str | None = None
for attempt in range(1, SCAN_HTTP_MAX_RETRIES + 1):
try:
response = requests.get(url, headers=headers, timeout=SCAN_HTTP_TIMEOUT_SEC)
if response.status_code in (429, 503):
retry_after = _to_int(response.headers.get("Retry-After"))
delay = retry_after if retry_after > 0 else SCAN_HTTP_BACKOFF_SEC * attempt
time.sleep(delay)
continue
if response.status_code >= 400:
raise RuntimeError(f"HTTP {response.status_code}: {response.text[:300]}")
return response.json()
except Exception as exc: # noqa: BLE001
last_error = str(exc)
if attempt < SCAN_HTTP_MAX_RETRIES:
time.sleep(SCAN_HTTP_BACKOFF_SEC * attempt)
continue
raise RuntimeError(f"Request failed for {url}: {last_error}") from exc
raise RuntimeError(f"Request failed for {url}: {last_error}")
def _extract_values(data: dict) -> list[dict]:
if "value" in data and isinstance(data["value"], list):
return data["value"]
d = data.get("d")
if isinstance(d, dict):
results = d.get("results")
if isinstance(results, list):
return results
return []
def _extract_next_link(data: dict) -> str | None:
for key in ("@odata.nextLink", "odata.nextLink", "__next"):
value = data.get(key)
if isinstance(value, str) and value:
return value
d = data.get("d")
if isinstance(d, dict):
value = d.get("__next")
if isinstance(value, str) and value:
return value
return None
def _get_role_assignments(url: str, headers: dict[str, str]) -> list[PermissionEntry]:
data = _request_json(url, headers)
assignments: list[PermissionEntry] = []
for item in _extract_values(data):
member = item.get("Member") or {}
login = str(member.get("LoginName") or "").strip()
title = str(member.get("Title") or "").strip()
principal_type = _to_int(member.get("PrincipalType"))
principal = _display_principal(login, title, principal_type)
if not principal:
continue
role_bindings = item.get("RoleDefinitionBindings")
roles = _extract_role_names(role_bindings)
for role_name in roles:
if role_name.lower() == "limited access":
continue
assignments.append(PermissionEntry(principal=principal, role_name=role_name))
return assignments
_ROLE_NAME_NL_TO_EN: dict[str, str] = {
"volledig beheer": "Full Control",
"ontwerpen": "Design",
"bewerken": "Edit",
"bijdragen": "Contribute",
"lezen": "Read",
"beperkte toegang": "Limited Access",
"goedkeuren": "Approve",
"hiërarchieën beheren": "Manage Hierarchy",
"weergeven alleen": "View Only",
"beperkt lezen": "Restricted Read",
}
def _normalize_role_name(name: str) -> str:
return _ROLE_NAME_NL_TO_EN.get(name.lower(), name)
def _extract_role_names(bindings) -> list[str]:
if isinstance(bindings, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in bindings if isinstance(x, dict) and x.get("Name")]
if isinstance(bindings, dict):
results = bindings.get("results")
if isinstance(results, list):
return [_normalize_role_name(str(x.get("Name") or "").strip()) for x in results if isinstance(x, dict) and x.get("Name")]
return []
def _deduplicate_hierarchical(deviations: list[DeviationRecord]) -> list[DeviationRecord]:
"""
Remove child-level deviations that are already covered by a parent in the URL hierarchy.
"""
sorted_devs = sorted(deviations, key=lambda d: len(d.object_url))
covered: dict[tuple[str, str], list[str]] = {}
result: list[DeviationRecord] = []
for dev in sorted_devs:
key = (dev.principal, dev.role_name)
ancestor_urls = covered.get(key)
if ancestor_urls:
parent = dev.object_url.rstrip("/")
already_covered = any(
parent == anc.rstrip("/") or parent.startswith(anc.rstrip("/") + "/")
for anc in ancestor_urls
)
if already_covered:
continue
else:
covered[key] = []
result.append(dev)
covered[key].append(dev.object_url)
return result
def _deviation_records_only_added(
object_url: str,
object_type: str,
root_set: set[PermissionEntry],
current_set: set[PermissionEntry],
) -> list[DeviationRecord]:
records: list[DeviationRecord] = []
for entry in sorted(current_set - root_set, key=lambda x: (x.principal.lower(), x.role_name.lower())):
records.append(
DeviationRecord(
object_url=object_url,
object_type=object_type,
principal=entry.principal,
role_name=entry.role_name,
delta_type="added",
)
)
return records
def _absolute_url(host: str, server_relative_url: str) -> str:
if not server_relative_url:
return f"https://{host}"
if server_relative_url.startswith("http://") or server_relative_url.startswith("https://"):
return server_relative_url
if not server_relative_url.startswith("/"):
server_relative_url = "/" + server_relative_url
return f"https://{host}{server_relative_url}"
def _to_int(value) -> int:
try:
if value is None:
return 0
return int(value)
except (TypeError, ValueError):
return 0
def _to_bool(value) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in ("1", "true", "yes")
return bool(value)

View File

@ -10,12 +10,14 @@ class CreateTenantProfileRequest(BaseModel):
tenant_id: str
client_id: str
client_secret: str | None = None
primary_domain: str | None = None
class TenantProfileItem(BaseModel):
id: str
name: str
tenant_id: str
primary_domain: str | None = None
client_id: str
has_certificate: bool
cert_thumbprint: str | None
@ -31,7 +33,13 @@ class TenantCertificateResponse(BaseModel):
class CreateScanJobRequest(BaseModel):
scan_type: str = "sharepoint"
site_urls: list[HttpUrl] = Field(default_factory=list)
mailboxes: list[str] = Field(default_factory=list)
scan_all_mailboxes: bool = False
organization: str | None = None
group_ids: list[str] = Field(default_factory=list)
scan_all_groups: bool = False
skip_default_sites: bool = True
tenant_profile_id: str | None = None
tenant_id: str | None = None
@ -43,6 +51,7 @@ class ScanJobSummary(BaseModel):
id: str
status: str
source_type: str
scan_type: str
skip_default_sites: bool
tenant_profile_id: str | None
tenant_name: str | None
@ -72,6 +81,16 @@ class ScanTargetItem(BaseModel):
error_message: str | None
started_at: datetime | None
finished_at: datetime | None
last_probe_at: datetime | None = None
last_probe_ok: bool | None = None
last_probe_message: str | None = None
class ProbeResultResponse(BaseModel):
target_id: int
ok: bool
message: str
last_probe_at: datetime
class PermissionDeviationItem(BaseModel):
@ -82,7 +101,8 @@ class PermissionDeviationItem(BaseModel):
principal: str
role_name: str
delta_type: str
resolved_members: str | None
permission_type: str | None = None
resolved_members: str | None = None
created_at: datetime
@ -95,6 +115,16 @@ class ResolveSharingLinksResponse(BaseModel):
updated_deviations: int
class ResolveGroupsResponse(BaseModel):
resolved_groups: int
skipped_groups: int
updated_deviations: int
class SharingLinkTypesResponse(BaseModel):
type_counts: dict[str, int]
class ScanJobDetail(ScanJobSummary):
targets: list[ScanTargetItem]
deviations: list[PermissionDeviationItem]

View File

@ -0,0 +1,22 @@
"""Clearview version metadata.
The three-part VERSION is the release version. Dev/test builds append the
explicit BUILD segment. Build numbers are source state, not derived from git
history, so operators can see exactly which image build is running.
"""
from __future__ import annotations
VERSION = "v0.1.0"
BUILD = 1
def display_version() -> str:
"""Return the user-visible Clearview version."""
if BUILD > 0:
return f"{VERSION}.{BUILD}"
return VERSION
def cache_version() -> str:
"""Return the static-asset cache-buster version."""
return display_version().lstrip("v")

View File

@ -16,7 +16,7 @@ from .config import (
)
from .db import SessionLocal
from .models import PermissionDeviation, ScanJob, ScanTarget, TenantProfile
from .scanner import AuthConfig, scan_site_for_deviations
from .scanners import AuthConfig, ProbeResult, probe, scan
log = logging.getLogger(__name__)
@ -121,6 +121,28 @@ class ScanWorker:
job.updated_at = now
db.commit()
probe = self._run_probe(target_id)
if not probe.ok:
with SessionLocal() as db:
job = db.get(ScanJob, job_id)
target = db.get(ScanTarget, target_id)
if not job or not target:
return
now = datetime.utcnow()
target.status = "failed"
target.attempts = 1
target.error_message = f"Preflight: {probe.message}"
target.finished_at = now
target.updated_at = now
job.processed_targets += 1
job.failed_targets += 1
job.heartbeat_at = now
job.updated_at = now
if not job.error_message:
job.error_message = "One or more scan targets failed preflight"
db.commit()
return
max_attempts = SCAN_TARGET_MAX_RETRIES + 1
last_error: str | None = None
latest_warning: str | None = None
@ -147,6 +169,7 @@ class ScanWorker:
principal=deviation.principal,
role_name=deviation.role_name,
delta_type=deviation.delta_type,
permission_type=deviation.permission_type,
)
)
@ -196,6 +219,48 @@ class ScanWorker:
db.commit()
def _run_probe(self, target_id: int):
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if not target:
return ProbeResult(ok=False, message="Target not found")
site_url = target.site_url
job = db.get(ScanJob, target.job_id)
if not job:
return ProbeResult(ok=False, message="Job not found")
scan_type = job.scan_type or "sharepoint"
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
result = probe(scan_type, site_url, auth)
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
if target:
now = datetime.utcnow()
target.last_probe_at = now
target.last_probe_ok = result.ok
target.last_probe_message = result.message
target.updated_at = now
db.commit()
return result
def _scan_with_timeout(self, target_id: int, timeout_sec: int):
with SessionLocal() as db:
target = db.get(ScanTarget, target_id)
@ -205,25 +270,30 @@ class ScanWorker:
job = db.get(ScanJob, target.job_id)
if not job:
raise RuntimeError(f"Job {target.job_id} not found for target {target_id}")
scan_type = job.scan_type or "sharepoint"
job_id = job.id
cert_private_key: str | None = None
cert_thumbprint: str | None = None
cert_public_pem: str | None = None
if job.tenant_profile_id:
profile = db.get(TenantProfile, job.tenant_profile_id)
if profile:
cert_private_key = profile.cert_private_key
cert_thumbprint = profile.cert_thumbprint
cert_public_pem = profile.cert_public_pem
auth = AuthConfig(
tenant_id=job.auth_tenant_id or "",
client_id=job.auth_client_id or "",
client_secret=job.auth_client_secret or "",
cert_private_key=cert_private_key,
cert_thumbprint=cert_thumbprint,
cert_public_pem=cert_public_pem,
)
def progress_callback(activity: str, items: int) -> None:
try:
with SessionLocal() as db:
job = db.get(ScanJob, target.job_id)
job = db.get(ScanJob, job_id)
if job:
job.scan_activity = activity
if items > 0:
@ -235,7 +305,7 @@ class ScanWorker:
pass
with ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(scan_site_for_deviations, site_url, auth, progress_callback)
future = pool.submit(scan, scan_type, site_url, auth, progress_callback)
try:
return future.result(timeout=timeout_sec)
except FutureTimeoutError as exc:

View File

@ -2,7 +2,11 @@
## Scope
Clearview scans SharePoint sites for permission deviations from the site root permission baseline.
Clearview scans Microsoft 365 for permission deviations across two domains:
1. **SharePoint sites** — deviations relative to the site root permission baseline (libraries, folders, files).
2. **Exchange Online mailboxes** — non-default permissions: Full Access, Send As, Send on Behalf, and folder delegations (Calendar, Inbox).
Designed to monitor multiple customer tenants from a single instance.
## Runtime Architecture
@ -16,13 +20,20 @@ All services are defined in `stack/docker-compose.yml` for Portainer deployment.
## Application Layout
- `containers/clearview/site/`
- Frontend UI (tenant management, manual URL input, CSV import, jobs, deviations)
- Frontend UI: vanilla HTML/JS/CSS with a fixed sidebar and hash-based routing.
- Routes: `#/dashboard`, `#/jobs`, `#/scan/sharepoint`, `#/scan/mailbox`, `#/tenants`, `#/settings`.
- `containers/clearview/src/clearview_app/`
- FastAPI backend
- SQLAlchemy models
- CSV parser
- Default-site filtering
- CSV parser (SharePoint URLs and mailbox UPNs)
- Default-site filtering (SharePoint only)
- Background worker for long-running scans
- `containers/clearview/src/clearview_app/scanners/`
- `common.py``AuthConfig`, `DeviationRecord`, `ScanResult`, `ProbeResult`, shared helpers.
- `sharepoint.py` — SharePoint REST scanner, MSAL token cache, hierarchical dedup, SharingLinks helpers.
- `mailbox.py` — Exchange Online scanner; spawns `pwsh` with the EXO scripts.
- `exo_scripts/` — PowerShell scripts (`probe.ps1`, `get-permissions.ps1`).
- Dispatcher: `scanners.scan(scan_type, target, auth, progress)` and `scanners.probe(scan_type, target, auth)`.
## Multi-Tenant Model
@ -71,6 +82,7 @@ The scanner uses the certificate path when `cert_thumbprint` is present on the t
|---|---|
| `client_secret` | Azure client secret (optional when a certificate is available) |
| `cert_private_key` | PEM-encoded private key (internal, never exposed via API) |
| `cert_public_pem` | PEM-encoded public certificate (used to build a PFX for Exchange Online PowerShell) |
| `cert_thumbprint` | SHA-1 thumbprint (used by MSAL) |
| `cert_expires_at` | Certificate expiry date |
@ -85,6 +97,26 @@ Scans run asynchronously through a DB-backed job queue:
5. Background worker processes targets with retries and per-target timeout.
6. API/UI expose progress and deviations per job.
### Connection Preflight
Before the full scan of a target runs, the worker performs a lightweight probe to verify that the configured credentials can actually reach the site and read role assignments. This catches the common setup errors (missing admin consent, certificate not yet uploaded to Azure, wrong tenant/client ID) early and with a clear message, instead of producing a silent 401 during the full scan.
The probe issues two calls:
1. `GET /_api/web?$select=Title` — validates token + tenant + site URL.
2. `GET /_api/web/roleassignments?$top=1&$select=PrincipalId` — validates that the app actually has permission to read role assignments (not only basic read).
The result is persisted per target in `last_probe_at`, `last_probe_ok`, and `last_probe_message`. If the probe fails, the target is marked `failed` with `error_message = "Preflight: <hint>"` and the full scan is skipped. Hints interpret common HTTP codes:
| Code | Hint |
|---|---|
| 401 on `/_api/web` | Certificate not uploaded in Azure, or wrong tenant/client ID |
| 401 on `/roleassignments` | Admin consent missing, or granted permission too low |
| 403 | App has no access to this site (e.g. `Sites.Selected` without a per-site grant) |
| 404 | Site not found |
The same probe is exposed as an on-demand **Test connection** action on each target in the Job Details UI (see API Endpoints below). The action is blocked while the job is still queued or running.
### Timeout and Retry Controls
Configured through environment variables (defaults shown):
@ -162,6 +194,7 @@ GET /api/scan-jobs/{id} Get job detail (targets
POST /api/scan-jobs/{id}/cancel Cancel a queued or running job
DELETE /api/scan-jobs/{id} Delete a completed job and all its data
POST /api/scan-jobs/{id}/resolve-sharing-links Resolve SharingLinks group members post-scan
POST /api/scan-jobs/{id}/targets/{tid}/test-connection Re-run the connection preflight for one target
GET /api/scan-jobs/{id}/export Download deviations as .xlsx (optional ?site_url=)
```
@ -189,23 +222,77 @@ Main tables:
| Table | Key columns |
|---|---|
| `tenant_profiles` | credentials, `cert_private_key`, `cert_thumbprint`, `cert_expires_at` |
| `scan_jobs` | `status`, `tenant_profile_id`, progress counters, auth credentials |
| `scan_targets` | `job_id`, `site_url`, `status`, `attempts`, `error_message` |
| `permission_deviations` | `job_id`, `site_url`, `object_url`, `object_type`, `principal`, `role_name`, `delta_type`, `resolved_members` |
| `tenant_profiles` | credentials, `cert_private_key`, `cert_public_pem`, `cert_thumbprint`, `cert_expires_at` |
| `scan_jobs` | `status`, `scan_type` (`sharepoint`/`mailbox`), `tenant_profile_id`, progress counters, auth credentials |
| `scan_targets` | `job_id`, `site_url` (holds UPN for mailbox jobs), `status`, `attempts`, `error_message`, `last_probe_at`, `last_probe_ok`, `last_probe_message` |
| `permission_deviations` | `job_id`, `site_url`, `object_url`, `object_type`, `principal`, `role_name`, `delta_type`, `permission_type`, `resolved_members` |
Scan jobs, targets, and deviations are cascade-deleted when a job is removed via `DELETE /api/scan-jobs/{id}`. Jobs with status `queued` or `running` cannot be deleted.
Schema migrations for new columns are applied automatically on startup via `_ensure_schema_columns()` in `main.py`.
## Mailbox Scanning
Mailbox scans use Exchange Online PowerShell with certificate-based app-only auth.
### What is collected
| Permission | PowerShell source | `permission_type` value |
|---|---|---|
| Full Access (and other mailbox-level rights) | `Get-MailboxPermission` | `FullAccess` |
| Send As | `Get-RecipientPermission` (`AccessControlType=Allow`) | `SendAs` |
| Send on Behalf | mailbox property `GrantSendOnBehalfTo` | `SendOnBehalf` |
| Folder delegation — Calendar | `Get-MailboxFolderPermission "<upn>:\Calendar"` | `Folder:Calendar` |
| Folder delegation — Inbox | `Get-MailboxFolderPermission "<upn>:\Inbox"` | `Folder:Inbox` |
The scanner filters out `NT AUTHORITY\SELF`, `S-1-5-*` SIDs, inherited mailbox permissions, and the default folder principals (`Default`, `Anonymous` with `None` rights). What remains is stored as deviations on the job — there is no SharePoint-style root baseline; every non-default principal counts.
### Authentication
Mailbox scanning uses the **same tenant certificate** as SharePoint, but Exchange Online requires a `.pfx` rather than a thumbprint + raw private key. At scan time Clearview builds an in-memory PFX from `cert_private_key` + `cert_public_pem` (random password), writes it to a tempdir, and removes it immediately after the `pwsh` process exits.
### Targets
Three ways to seed a mailbox scan job:
1. **Manual UPNs** — paste one UPN per line.
2. **CSV import** — column `UserPrincipalName` / `Email` / `Mailbox` / `Primary SMTP Address` (auto-detected, case-insensitive).
3. **All mailboxes in tenant** — Clearview enumerates every mailbox via `Get-EXOMailbox -ResultSize Unlimited` and queues one target per mailbox. Requires the tenant's primary domain (e.g. `contoso.onmicrosoft.com`) so `Connect-ExchangeOnline -Organization` can authenticate. Capped at 50000 mailboxes per job.
### Required Azure permissions
In addition to the SharePoint setup the scan app needs:
- API permission: **Office 365 Exchange Online → Application permissions → `Exchange.ManageAsApp`** (admin-consented).
- Entra role assigned to the app's service principal: **Exchange Administrator** (cannot be granted via Microsoft Graph; must be assigned in Azure Portal → Entra ID → Roles and administrators).
### Runtime requirements
The container image installs:
- **PowerShell 7 (`pwsh`)** from the official Microsoft package repo.
- **`ExchangeOnlineManagement`** module from PSGallery (`Install-Module -Scope AllUsers`).
Adds roughly 150 MB to the image. Without these, mailbox probes return `pwsh not available in runtime` and scans fail.
### Probe
Mailbox preflight runs `probe.ps1` which connects to Exchange Online and calls `Get-EXOMailbox -Identity <upn> -PropertySets Minimum`. Failure hints map common errors:
| Error fragment | Hint |
|---|---|
| `Unauthorized` / `401` / `AADSTS*` | Check `Exchange.ManageAsApp` permission, admin consent, and the Exchange Administrator role assignment |
| `Couldn't find object` / `not found` | Mailbox does not exist in this tenant |
| `module not available` | `ExchangeOnlineManagement` PS module missing in the container |
## Build and Release
Use `./build-and-push.sh` from repo root.
`./build-and-push.sh` from the repo root, sourced from the shared script in `/docker/develop/shared-integrations/tooling/docker-build-and-push/`.
- `./build-and-push.sh t` for test build (`:dev` tag only)
- `./build-and-push.sh 1` patch release
- `./build-and-push.sh 2` minor release
- `./build-and-push.sh 3` major release
- `./build-and-push.sh t` — test build, push `:dev` tag only.
- `./build-and-push.sh r` — release build, parses the version from `docs/changelog.md` (first `## vX.Y.Z` heading), pushes `:<version>`, `:dev`, and `:latest`.
The script performs no git operations. After a successful release, run the `git commit` / `git tag` / `git push --tags` commands the script prints in its summary.
## Current Scan Mode

View File

@ -2,6 +2,71 @@
This file documents changes on the develop branch of this project.
## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
### Added
- **Version metadata module `clearview_app/version.py`** — single source of truth mirroring Dropkeep: `VERSION = "v0.1.0"` (release) + `BUILD = 0` (explicit dev/test build segment, source state, not git-derived). `display_version()` returns `vX.Y.Z.N` when `BUILD > 0`, else `vX.Y.Z`; `cache_version()` strips the leading `v`.
- **`GET /api/version` endpoint** — returns `{"version": display_version()}`. The FastAPI app `version=` is also sourced from `version.py` (was hardcoded `"0.1.0"`).
- **Version shown in the UI** — the sidebar footer version (previously a hardcoded `v0.1.0` in `index.html`) is now populated at load time from `/api/version` via a new `loadVersion()` in `app.js` (span `id="appVersion"`). Operators see exactly which image build is running, e.g. `v0.1.0.3`.
- **Build wrapper `build.sh` + `scripts/`**`./build.sh t` runs `scripts/bump-dev-build.py` (increments `BUILD`) then `./build-and-push.sh t`; `./build.sh r` runs `scripts/check-release-version.py` (asserts `BUILD == 0` and that `version.py` matches the top `docs/changelog.md` release heading) then `./build-and-push.sh r`. `scripts/set-release-version.py vX.Y.Z` sets a new release version and resets `BUILD = 0`. Build numbers are committed in source so the image carries the exact build with no Docker build args.
## 2026-05-26 — Root report: expand Entra/M365 groups & readable direct users
### Added
- **Entra/AAD & M365 group expansion at site root** — the "Resolve groups" action now also expands Azure AD security groups and Microsoft 365 groups that are assigned **directly** at the site root, not just classic SharePoint site groups. Previously these claim-encoded principals (`c:0t.c|tenant|<guid>`, `c:0o.c|federateddirectoryclaimprovider|<guid>`) were skipped by `is_sharepoint_group_principal`, so the root report showed only the group name and never the people inside — making the inventory incomplete. New helpers in `scanners/sharepoint.py`: `_extract_aad_group_object_id` (parses the Entra object id out of the claim, incl. the `_o` owners suffix), `is_aad_group_principal`, `resolve_aad_group_members`, and `_expand_aad_group_by_id` (extracted from `_expand_aad_group_via_graph` so both mail-based and id-based lookups share the `/groups/{id}/members` + `/owners` Graph path, depth-limited to 3 with a per-resolve `seen` set). `POST /api/scan-jobs/{id}/resolve-groups` now routes AAD/M365 group principals to the Graph resolver and SharePoint groups to the existing `getbyname` resolver. Requires `GroupMember.Read.All` (or `Group.Read.All`) on Microsoft Graph; without it the group stays visible by name and counts as "skipped" — no crash.
### Changed
- **Readable principals for directly-assigned users** — individual users granted rights directly on the site root now render as their UPN/email (e.g. `jan@contoso.com`) instead of the raw claim string `i:0#.f|membership|jan@contoso.com`. New helpers `_extract_user_upn` and `_display_principal` in `scanners/sharepoint.py`, applied in `_get_role_assignments` (so both the root scan and the deviation scan benefit, consistently on both sides of the root-vs-child set comparison). Only users with an `@`-shaped UPN are rewritten; groups, on-prem (`i:0#.w|domain\\user`) and built-in/system accounts keep their original LoginName so claim object ids stay resolvable and the site-root noise filter (`SHAREPOINT\\system`, `NT AUTHORITY\\*`, etc.) keeps matching.
## [2026-04-28]
### Changed
- **Excel export sheet name + columns adapt to scan type** — second sheet is now named `Mailbox Permissions` for mailbox jobs, `Group Memberships` for Entra-group jobs, `Root Permissions` for SharePoint-root jobs, and `Deviations` for the original SharePoint deviation scan. Column sets are tailored per type so headers like "Object URL" / "Link Risk" / "Delta" no longer appear on exports where they don't apply. Targets sheet first column label switches between Site URL / Mailbox / Group based on the job.
### Added
- **Entra Group Scan** — new scan type `entra_groups` dedicated to enumerating Microsoft 365 / Azure AD group memberships. New `scanners/entra.py` resolves a target (Object ID, mail, or display name) via Microsoft Graph and stores one deviation per user with role `Member` or `Owner` (with `(via group > nested-group)` chain when expanded recursively). Group classification (Microsoft 365 / Security / Mail-enabled Security / Distribution) is stored in `permission_type`. New helper `entra.list_all_groups` for the "All groups in tenant" option. New CSV parser `parse_entra_groups_csv` reads the `Object ID` column from the Entra portal Groups export. New sidebar route `#/scan/entra` with three forms (manual IDs, CSV import, all-tenant). New filter option in the Scan Jobs type dropdown. Job Details renders Group / Group Type / User / Role columns for these jobs. Requires `Group.Read.All` on Microsoft Graph.
- **Recursive group expansion via Microsoft Graph** — when a SharePoint group member is itself a Microsoft 365 / Azure AD group, the resolver now expands it transitively. New helpers `_expand_aad_group_via_graph` and `_graph_collect` in `scanners/sharepoint.py` call `/groups?$filter=mail eq …` to look up the group, then `/groups/{id}/members` and `/groups/{id}/owners` to enumerate users. Owners are tagged with `(owner)` in the output. Recursion is depth-limited to 3 with a per-resolve `seen` set to break cycles. Output format puts nested members in square brackets after the group name, e.g. `Pharmacology@contoso.onmicrosoft.com [alice@contoso.com, bob@contoso.com (owner)]`. Requires the new `Group.Read.All` Application permission on Microsoft Graph (added to the onboarding instructions). Without it, group lines remain collapsed and labelled `(group, no readable members)`.
- **Resolve SharePoint groups** — new "Resolve groups" action on the Job Details panel for SharePoint and SharePoint-root jobs. Expands every SharePoint group principal (Owners / Members / Visitors / custom site groups) to its underlying user list via `/_api/web/sitegroups/getbyname/<group>/users` and writes the comma-separated members to `permission_deviations.resolved_members`. Members are rendered below the principal in the Deviations table and included in the Excel export. Azure AD security groups and federated claims (principals starting with `c:0…` / `i:0…` or containing `|`) are skipped — those would need `Group.Read.All` on Microsoft Graph. New endpoint `POST /api/scan-jobs/{id}/resolve-groups`, helper `sharepoint.is_sharepoint_group_principal()`.
- **SharePoint root-permissions scan mode** — new `scan_type='sharepoint_root'` that lists role assignments on the site root only, without traversing libraries/folders/files. Much faster (~1 HTTP call per target) and useful for an inventory of who has site-level access. New scanner function `sharepoint.scan_site_root_permissions`. Records are stored with `delta_type='root'` and `object_type='Site'`. Selectable on the New SharePoint Scan page via a "Scan mode" dropdown that controls both the manual-URL and CSV-import forms. New filter option in the Scan Jobs type filter. Noise filter `_is_noise_principal` excludes SharingLinks groups, `SHAREPOINT\system`/`NT AUTHORITY\*` accounts, and "Limited Access System Group" entries — these are SharePoint plumbing surfaced at site-root by spotted-item shares and are not part of a meaningful root inventory.
- **Tenant `primary_domain` field** — new column on `tenant_profiles`, exposed in the Add Tenant form (e.g. `contoso.onmicrosoft.com`). When set, the Mailbox scan page auto-fills the Organization field on tenant selection, and the API falls back to it when `organization` is omitted on a `scan_all_mailboxes` request. SharePoint scans are unaffected.
- **Expanded mailbox-scan onboarding instructions** — new "Enable mailbox scanning" section in the Add Tenant form covers adding the `Exchange.ManageAsApp` API permission, granting admin consent, assigning the Exchange Administrator Entra role to the service principal, certificate generation/upload, and primary-domain entry. Always visible (independent of automated/manual onboarding mode).
- **Scan all mailboxes in a tenant** — third option on the Mailbox scan page next to manual UPNs and CSV import. Clearview enumerates every mailbox via `Get-EXOMailbox -ResultSize Unlimited` and queues one target per mailbox. Requires the tenant's primary domain (e.g. `contoso.onmicrosoft.com`) and a tenant certificate. New PowerShell script `exo_scripts/list-mailboxes.ps1`, new Python helper `mailbox.list_mailboxes()`, new request fields `scan_all_mailboxes` and `organization`. Job source type is recorded as `tenant_all`.
### Changed
- **Sidebar logo** — replaced with a dark-background variant (`assets/clearview-logo-dark.svg`) so the "view" wordmark stays legible on the dark sidebar (previously rendered in `#141413` and was invisible).
- **English-only UI** — replaced remaining Dutch labels in the application with English equivalents: probe status `Nog niet getest`/`Mislukt` → `Not tested yet`/`Failed`, button label `Testen…``Testing…`, error toast `Test mislukt:``Test failed:`, and probe hints in `scanners/sharepoint.py` + `scanners/mailbox.py`. The Dutch→English role-name mapping table in `sharepoint.py` is unchanged (it normalizes incoming SharePoint role names).
- **Mailbox permission scanning** — Clearview can now scan Exchange Online mailboxes for delegated access alongside SharePoint sites.
- Permission categories collected: Full Access (`Get-MailboxPermission`), Send As (`Get-RecipientPermission`), Send on Behalf (`GrantSendOnBehalfTo` mailbox property), and folder delegations on Calendar and Inbox (`Get-MailboxFolderPermission`).
- Implementation: `pwsh` subprocess invoking the `ExchangeOnlineManagement` module with certificate-based app-only authentication (same tenant profile cert as SharePoint scans).
- Default principals (`NT AUTHORITY\SELF`, `S-1-5-*`, folder `Default`/`Anonymous=None`) are filtered out at scan time; only non-default permissions become deviations.
- Mailbox scans require a tenant certificate plus the `Office 365 Exchange Online → Exchange.ManageAsApp` API permission and the **Exchange Administrator** Entra role on the scan app's service principal. Client-secret auth is not supported by Exchange Online.
- **Frontend sidebar layout** — single-page UI replaced with a fixed left sidebar (200px, dark) and routed pages, mirroring the AlertHub layout convention.
- Routes via hash-based router: `#/dashboard`, `#/jobs`, `#/scan/sharepoint`, `#/scan/mailbox`, `#/tenants`, `#/settings`. Implementation stays vanilla HTML/JS/CSS (no React introduction).
- Job Details panel adapts column labels and headers based on `scan_type`: SharePoint shows Site/Object/Type/Principal/Role/Delta; Mailbox shows Mailbox/Object/Permission Type/Principal/Access Rights. SharingLinks resolution is hidden for mailbox jobs.
- Jobs list gets a **Type** column (SharePoint / Mailbox) and a type filter.
- **Scanners package**`clearview_app/scanner.py` split into `clearview_app/scanners/{__init__.py, common.py, sharepoint.py, mailbox.py, exo_scripts/}`. Public dispatcher `scanners.scan(scan_type, target, auth, progress)` and `scanners.probe(scan_type, target, auth)`. The original `scanner.py` remains as a thin compatibility shim re-exporting the SharePoint API.
- **Datamodel changes** (auto-migrated on startup):
- `scan_jobs.scan_type VARCHAR(32) NOT NULL DEFAULT 'sharepoint'`
- `permission_deviations.permission_type VARCHAR(32)` — populated by mailbox scans (`FullAccess`, `SendAs`, `SendOnBehalf`, `Folder:Calendar`, `Folder:Inbox`)
- `tenant_profiles.cert_public_pem TEXT` — public PEM is now stored alongside the private key so the mailbox scanner can build a `.pfx` for `Connect-ExchangeOnline -CertificateFilePath`. Existing tenants need to regenerate the certificate before mailbox scanning is available; SharePoint scans keep working with the existing key.
- **Mailbox CSV import**`parse_mailboxes_csv` accepts `UserPrincipalName` / `UPN` / `Email` / `Mailbox` / `Primary SMTP Address` columns with case-insensitive matching, dedup, and email-shape validation.
- **API additions**:
- `POST /api/scan-jobs` payload extended with `scan_type` and `mailboxes[]` next to the existing `site_urls[]`.
- `POST /api/scan-jobs/import-csv` accepts a `scan_type` form field (`sharepoint`|`mailbox`).
- `GET /api/scan-jobs?scan_type=…` filter.
- `ScanJobSummary.scan_type` and `PermissionDeviationItem.permission_type` returned.
- **Dockerfile** now installs Microsoft PowerShell 7 from the official Microsoft repository plus the `ExchangeOnlineManagement` PowerShell module from PSGallery. Adds ~150 MB to the image.
- **Build script migration** — replaced the local `build-and-push.sh` with the shared version from `/docker/develop/shared-integrations/tooling/docker-build-and-push/`. Reads the version from `docs/changelog.md` (release-summary file) instead of `version.txt`.
- **`docs/changelog.md`** — new release-summary changelog file used by the new build script. The development log (`changelog-develop.md`) remains the append-only source of truth for individual changes.
## [2026-04-23]
### Added
- **Connection preflight per scan target** — before a target is scanned, a lightweight probe validates that the configured credentials can reach the site and read role assignments (`/_api/web` + `/_api/web/roleassignments?$top=1`). Targets that fail preflight are marked `failed` with a clear reason (401/403/404 hints) instead of attempting the full scan. Fixes the previous silent-failure behaviour when admin consent or the certificate upload was missing in Azure.
- **Manual "Test" button** — new button in the Targets table in Job Details that re-runs the probe on demand. New endpoint: `POST /api/scan-jobs/{id}/targets/{target_id}/test-connection`. Blocked while the job is still queued or running.
- **Probe status in UI** — each target row shows the last probe result (OK / Mislukt / Nog niet getest) with timestamp and error message. Fields persist until the next test, so "last known status" remains visible even after permissions are later revoked.
- `scan_targets` table extended with `last_probe_at`, `last_probe_ok`, `last_probe_message` (auto-migrated on startup).
## [2026-04-13]
### Added

15
docs/changelog.md Normal file
View File

@ -0,0 +1,15 @@
# Clearview changelog
This file is the **release-summary** changelog used by `build-and-push.sh` to determine the current version. The first heading must be the most recent release in the form `## vX.Y.Z — YYYY-MM-DD`.
For day-by-day development history see [`changelog-develop.md`](changelog-develop.md).
## v0.1.0 — 2026-04-13
### Added
- Initial Clearview release: SharePoint permission deviation scanning across multiple customer tenants.
- Tenant Profiles with certificate-based or client-secret authentication.
- Asynchronous scan job queue with per-target preflight probe and retry handling.
- Job Details panel with site filter, Excel export, and SharingLinks resolution.
- CSV import of Microsoft Sites export format.
- Two onboarding modes (automated via Graph platform app, or manual).

185
docs/code-review-todo.md Normal file
View File

@ -0,0 +1,185 @@
# Code Review TODO — Clearview
**Aangemaakt:** 2026-05-19
**Branch bij review:** `refactor/scanner-package-frontend`
**Scope:** Eerste volledige review (~7.100 regels code)
**Totaal:** 13 CRITICAL · 19 HIGH · 14 MEDIUM · 1 LOW
Werkvolgorde: alle CRITICAL eerst (P0), daarna HIGH (P1), dan MEDIUM/LOW (P2).
Per item staan severity, bestand(en):regel(s), en de gewenste fix.
---
## P0 — CRITICAL (eerst dichten)
### Auth & secrets
- [ ] **Geen authenticatie op enig API-endpoint**
- `containers/clearview/src/clearview_app/main.py` (alle `/api/` routes)
- Fix: API-key via `X-API-Key` header met FastAPI `Security()` dependency, of Bearer-token op alle `/api/` routes.
- [ ] **Client secrets staan plaintext in DB**
- `containers/clearview/src/clearview_app/models.py:21` (`TenantProfile.client_secret`)
- `containers/clearview/src/clearview_app/models.py:45` (`ScanJob.auth_client_secret`)
- Fix: encrypt-at-rest met `cryptography.fernet`; key via env var. Decrypt enkel in geheugen bij gebruik.
- [ ] **`.env` niet in `.gitignore`**
- `.gitignore`, `stack/.env`
- Fix: voeg `stack/.env` en `**/.env` toe aan `.gitignore`; lever `stack/.env.example` met placeholders; verifieer dat `.env` nog niet in git history zit (anders rotate credentials).
- [ ] **Hardcoded DB-fallback `clearview:clearview`**
- `containers/clearview/src/clearview_app/config.py:17-19`
- Fix: verwijder default; `raise RuntimeError("DATABASE_URL required")` als env ontbreekt.
- [ ] **Adminer publiek op `0.0.0.0:8081`**
- `stack/docker-compose.yml:44-46`
- Fix: bind aan `127.0.0.1:${ADMINER_PORT:-8081}:8080` of verwijder uit prod-compose.
### Injectie & exfiltratie
- [ ] **XSS via ongescapete velden in `innerHTML`** (3 vindplaatsen)
- `containers/clearview/site/app.js:658-676` (`job.id`, `job.source_type`, `job.items_scanned`)
- `containers/clearview/site/app.js:885-894` (`job.status`, `total/processed/successful/failed_targets`, `items_scanned`)
- `containers/clearview/site/app.js:175-184` (`statusBadge()` zonder escape op `status`)
- Fix: consequent `escHtml()` op alle API-velden, óók ID's en numerieke. Geen uitzonderingen.
- [ ] **Open redirect via `payload.connect_url`**
- `containers/clearview/site/app.js:472`
- Fix: valideer `new URL(payload.connect_url).protocol === 'https:'` + host-allowlist (`login.microsoftonline.com`).
- [ ] **SSRF / token-exfiltratie via `@odata.nextLink`**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:547-553`
- `containers/clearview/src/clearview_app/scanners/entra.py:227-272`
- Fix: vergelijk `urlparse(next_url).netloc == urlparse(original_url).netloc`; gooi anders een `RuntimeError`.
- [ ] **Header injection in `Content-Disposition`**
- `containers/clearview/src/clearview_app/main.py:701-705`
- Fix: type de route-parameter als `uuid.UUID` zodat FastAPI `job_id` valideert; `urllib.parse.quote(filename, safe="")`.
---
## P1 — HIGH
### Correctheid
- [ ] **Token cache zonder TTL — workers crashen na 1 uur**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:34, 512-543`
- Fix: bewaar `expires_at = time.time() + result["expires_in"] - 60`; invalideer in `_get_token_for_host`.
- [ ] **MSAL `ConfidentialClientApplication` per aanroep**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:530`
- Fix: module-level dict `(tenant_id, client_id, auth_method) -> app`. Hergebruik object.
- [ ] **`_TOKEN_CACHE` zonder lock (race in multi-thread)**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:34`
- Fix: `threading.Lock` rond check-then-write, of `functools.lru_cache` + TTL-wrapper.
- [ ] **Race condition in worker: niet-atomaire job-claim**
- `containers/clearview/src/clearview_app/worker.py:48-68`
- Fix: één `UPDATE scan_jobs SET status='running' WHERE id=:id AND status='queued' RETURNING id` met `FOR UPDATE SKIP LOCKED`.
- [ ] **Auto-refresh race in frontend**
- `containers/clearview/site/app.js:1009-1013` + alle `tick()` callsites
- Fix: `AbortController` per render; vorige request cancelen voordat nieuwe gestart wordt.
- [ ] **Event-listener accumulatie / re-render-pattern**
- `containers/clearview/site/app.js:238-270, 678-702`
- Fix: event delegation op stabiele container (`els.jobsTableBody.addEventListener('click', ...)`).
- [ ] **Sequentieel awaiten van onafhankelijke calls**
- `containers/clearview/site/app.js:554-555, 999, 1281, 1364`
- Fix: `await Promise.all([refreshJobs(), refreshSelectedJob()])`.
- [ ] **Niet-afgehandelde floating promise**
- `containers/clearview/site/app.js:1143`
- Fix: `.catch(err => showFeedback(...))` op `testTargetConnection(...)`.
- [ ] **OAuth state-store is in-memory dict (breekt bij `--workers >1`)**
- `containers/clearview/src/clearview_app/onboarding.py:31, 134-145`
- Fix: state opslaan in DB (tabel `oauth_states` met `created_at`, `consumed_at`) of Redis.
- [ ] **`scan_type` ongevalideerd**
- `containers/clearview/src/clearview_app/schemas.py:36`, `main.py:178, 207`
- Fix: `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` in Pydantic schema.
- [ ] **`datetime.utcnow()` deprecated, timezone-naive** (overal)
- `main.py`, `worker.py`, `models.py:26-27`, `cert.py:33`
- Fix: `datetime.now(timezone.utc)`; `DateTime(timezone=True)` in SQLAlchemy-kolommen.
- [ ] **`ThreadPoolExecutor(max_workers=1)` per target**
- `containers/clearview/src/clearview_app/worker.py:307`
- Fix: gedeelde executor; documenteer dat `future.cancel()` lopende scan niet onderbreekt.
- [ ] **Geen throttling-respect bij 429 in item-loop**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:603-619`
- Fix: batching via `$expand=RoleAssignments` of exponential backoff op item-niveau.
### Hardening
- [ ] **Container draait als root**
- `containers/clearview/Dockerfile`
- Fix: `RUN adduser --system --ingroup clearview clearview` + `USER clearview`.
- [ ] **`.deb` van packages.microsoft.com zonder checksum**
- `containers/clearview/Dockerfile:17-19`
- Fix: hardcoded SHA256 + `sha256sum --check`, of officiële GPG-key via `signed-by`.
- [ ] **`Install-Module ExchangeOnlineManagement` zonder versie-pin**
- `containers/clearview/Dockerfile:24-26`
- Fix: `-RequiredVersion 3.7.0` (of huidige geteste versie).
- [ ] **Graph-foutberichten gelekt richting frontend**
- `containers/clearview/src/clearview_app/onboarding.py:170, 188`
- Fix: volledig log naar server-side DEBUG; aan client alleen generieke code + HTTP-status.
- [ ] **OData-filter injection via displayName/mail**
- `containers/clearview/src/clearview_app/scanners/entra.py:178-196`
- Fix: `urllib.parse.quote(cleaned.replace("'", "''"), safe="")`.
- [ ] **PowerShell-args zonder UPN-validatie**
- `containers/clearview/src/clearview_app/scanners/mailbox.py:181-190`
- `containers/clearview/src/clearview_app/scanners/exo_scripts/get-permissions.ps1:107`
- Fix: `re.fullmatch(r"[^@\s]{1,64}@[^@\s]{1,255}", upn)` vóór `_run_pwsh`.
### Architectuur
- [ ] **`main.py` is 1139 regels — splitsen**
- Fix: `routers/{tenants,jobs,onboarding}.py`, `services/job_service.py`, `export.py`.
- [ ] **`sharepoint.py` is 722 regels — splitsen**
- Fix: `_auth.py`, `_http.py`, `sharepoint.py` (publieke scanfuncties), `sharing_links.py`.
- [ ] **`_ensure_schema_columns()` met 18 raw `ALTER TABLE`**
- `containers/clearview/src/clearview_app/main.py:1115-1139`
- Fix: vervang door Alembic; commit baseline-migratie + history.
---
## P2 — MEDIUM / LOW
- [ ] **`app.js` 1586 regels, geen build step** — splits in ES-modules + esbuild/rollup; verwijder IIFE-wrapper.
- [ ] **Focus management bij route-wissel ontbreekt**`app.js:1529-1551` — focus naar `<h2>` van nieuwe route na navigatie.
- [ ] **Focus-outline 38% opacity voldoet niet aan WCAG 3:1**`styles.css:292-296``outline: 2px solid var(--cv-accent)`.
- [ ] **Geen debouncing/abort op `jobSiteFilter`**`app.js:1158-1172``AbortController` per fetch.
- [ ] **`els.submitFeedback` gebruikt voor niet-SharePoint feedback** — `app.js:682, 711` — generiek feedback-element of context-specifiek.
- [ ] **Magic string `'__manual__'`**`app.js:412, 424` — named constant.
- [ ] **Icon-knoppen missen `aria-label`**`app.js:229-231``aria-label` toevoegen.
- [ ] **`scanner.py` is shim zonder waarschuwing** — `DeprecationWarning` of verwijderen.
- [ ] **`except Exception: pass` zonder logging** (meerdere) — minimaal `log.warning(..., exc_info=True)`.
- [ ] **`_resolve_credentials(db, ...)` mist type-hint** — `main.py:935``db: Session`.
- [ ] **`CreateScanAppResponse` lekt secret zonder waarschuwing** — `schemas.py:149-155``show_once: bool` veld + log-suppression voor dit endpoint.
- [ ] **Deviations hard-capped op 1000**`main.py:728` — voeg `total_deviations_count` + `truncated: bool` toe.
- [ ] **Geen `logging` in scanners-package**`import logging; logger = logging.getLogger(__name__)` overal.
- [ ] **`list-mailboxes.ps1` laadt alles in geheugen** — `-ResultSize Unlimited` → paginering of cap.
- [ ] **`cert.py` private key zonder encryptie** — documenteer aanname dat caller encryptie-at-rest doet.
- [ ] **`build-and-push.sh` doet `git add -A`** — `build-and-push.sh:294` — expliciete file-lijst of bevestigingsprompt.
- [ ] **README build-instructies kloppen niet (1/2/3 vs t/r)**`README.md:80-84`.
---
## Werkwijze
1. Werk per checkbox; verwijder geen items maar zet `[x]` als done.
2. Bij oplevering van een batch: append entry in `docs/changelog-develop.md`.
3. CRITICAL items vereisen handmatige verificatie (auth-tests, secret-rotation, XSS-payload checks).
4. Na P0 + P1: hertest met deze TODO als checklist voordat een nieuwe review wordt aangevraagd.

22
scripts/bump-dev-build.py Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""Increment Clearview's explicit dev/test build number."""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
text = VERSION_FILE.read_text()
match = re.search(r"^BUILD = (\d+)\s*$", text, flags=re.MULTILINE)
if not match:
raise SystemExit(f"BUILD assignment not found in {VERSION_FILE}")
next_build = int(match.group(1)) + 1
text = text[: match.start(1)] + str(next_build) + text[match.end(1) :]
VERSION_FILE.write_text(text)
version_match = re.search(r'^VERSION = ["\']([^"\']+)["\']\s*$', text, flags=re.MULTILINE)
version = version_match.group(1) if version_match else "v?.?.?"
print(f"[bump-dev-build] {version}.{next_build}")

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
"""Validate Clearview release version state before a release build."""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
CHANGELOG = ROOT / "docs" / "changelog.md"
ns: dict[str, object] = {}
exec(VERSION_FILE.read_text(), ns)
version = str(ns.get("VERSION", ""))
build = int(ns.get("BUILD", -1))
if build != 0:
raise SystemExit(f"Release builds require BUILD = 0 in {VERSION_FILE}; found BUILD = {build}")
match = re.search(r"^## (v\d+\.\d+\.\d+) — \d{4}-\d{2}-\d{2}\s*$", CHANGELOG.read_text(), flags=re.MULTILINE)
if not match:
raise SystemExit(f"No release heading found in {CHANGELOG}; expected '## vX.Y.Z — YYYY-MM-DD'")
changelog_version = match.group(1)
if changelog_version != version:
raise SystemExit(
f"Version mismatch: {VERSION_FILE} has {version}, but top changelog release is {changelog_version}"
)
print(f"[check-release-version] {version}")

24
scripts/set-release-version.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""Set Clearview's release version and reset the dev/test build segment."""
from __future__ import annotations
import re
import sys
from pathlib import Path
if len(sys.argv) != 2:
raise SystemExit("usage: scripts/set-release-version.py vX.Y.Z")
version = sys.argv[1]
if not re.fullmatch(r"v\d+\.\d+\.\d+", version):
raise SystemExit("version must match vX.Y.Z, for example v0.1.1")
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
text = VERSION_FILE.read_text()
text, n_version = re.subn(r'^VERSION = ["\'][^"\']+["\']\s*$', f'VERSION = "{version}"', text, count=1, flags=re.MULTILINE)
text, n_build = re.subn(r"^BUILD = \d+\s*$", "BUILD = 0", text, count=1, flags=re.MULTILINE)
if n_version != 1 or n_build != 1:
raise SystemExit(f"Could not update VERSION/BUILD in {VERSION_FILE}")
VERSION_FILE.write_text(text)
print(f"[set-release-version] {version}")