Dev build 2026-05-26 14:09

This commit is contained in:
Ivo Oskamp 2026-05-26 14:09:27 +02:00
parent 6741190342
commit 61db7fe4a7
12 changed files with 726 additions and 23 deletions

View File

@ -6,12 +6,24 @@ set -euo pipefail
#
# Purpose:
# - Build & push Docker images for each service under ./containers/*
# - Branch model: `main` is permanent and tracked by the prod stack
# (image `:latest`). `dev` is a short-lived branch tracked by the dev
# stack (image `:dev`); it is recreated from `main` at the start of each
# cycle and deleted after each release. Tags `vX.Y.Z` cover rollback —
# no `release/<version>` snapshot branches.
# - Two modes:
# t (test) = only push :dev
# r (release) = push :<version>, :dev, :latest
# version is read from the top of changelog.md
#
# No git operations: committing and tagging is done manually.
# t (test) = build & push :dev. Switches to (or creates) the `dev`
# branch from `main` if not already on it, commits any
# pending changes, and pushes `dev` to origin so the dev
# stack picks up the matching compose/config alongside
# the :dev image.
# r (release) = MUST be run from `main`. Build & push :<version>,
# :dev, :latest. Fast-forwards `main` against
# `origin/main`, merges `dev` into `main` if it exists
# (local or remote), creates a `Release vX.Y.Z` commit,
# tags it, pushes `main` and the tag, and deletes the
# `dev` branch locally and on origin. Version is read
# from the top of changelog.md.
#
# Usage:
# ./build-and-push.sh [mode]
@ -25,14 +37,112 @@ set -euo pipefail
# ## vX.Y.Z — YYYY-MM-DD
# (the version is parsed from the first such line)
# - One Dockerfile per service under ./containers/<service>/Dockerfile
#
# ----------------------------------------------------------------------------
# Self-update notice (for humans AND for Claude):
#
# The canonical/master copy of this script lives at:
# /docker/develop/shared-integrations/tooling/docker-build-and-push/build-and-push.sh
#
# When this script is copied into a project repo, it may drift from master.
# On startup the script compares its own SCRIPT_VERSION to the master copy and
# warns if a newer version exists. It does NOT auto-overwrite (too risky during
# a release run).
#
# CLAUDE INSTRUCTION:
# If you (Claude) are about to invoke this script in a project repo, first
# check whether the master copy at the path above is newer (different
# SCRIPT_VERSION or differing file contents). If so, copy the master over
# the local copy BEFORE running, then proceed. Mention the update to the user.
# ============================================================================
SCRIPT_VERSION="1.7.0"
SCRIPT_MASTER_PATH="/docker/develop/shared-integrations/tooling/docker-build-and-push/build-and-push.sh"
DOCKER_REGISTRY="gitea.oskamp.info"
DOCKER_NAMESPACE="ivooskamp"
CHANGELOG_FILE="docs/changelog.md"
CONTAINERS_DIR="containers"
# --- Self-update check -------------------------------------------------------
# Compare this script to the canonical master copy. If it differs, offer to
# copy master over the local copy and re-exec with the same arguments so the
# build runs against the up-to-date script.
#
# Skip with: SKIP_SELF_UPDATE=1 ./build-and-push.sh ...
self_update_check() {
if [[ "${SKIP_SELF_UPDATE:-0}" == "1" ]]; then
return 0
fi
local self_path="${BASH_SOURCE[0]}"
# Resolve to absolute path so a comparison against itself is detected.
local self_abs
self_abs="$(cd "$(dirname "$self_path")" 2>/dev/null && pwd)/$(basename "$self_path")" || self_abs="$self_path"
if [[ "$self_abs" == "$SCRIPT_MASTER_PATH" ]]; then
return 0 # We ARE the master copy.
fi
if [[ ! -f "$SCRIPT_MASTER_PATH" ]]; then
return 0 # Master not reachable from this host; silently skip.
fi
local master_version reason=""
master_version="$(grep -m1 -E '^SCRIPT_VERSION=' "$SCRIPT_MASTER_PATH" | sed -E 's/.*"([^"]+)".*/\1/')"
if [[ -n "$master_version" && "$master_version" != "$SCRIPT_VERSION" ]]; then
reason="version"
elif ! cmp -s "$self_abs" "$SCRIPT_MASTER_PATH"; then
reason="contents"
else
return 0 # Identical to master.
fi
echo "[WARN] Local build-and-push.sh differs from master."
if [[ "$reason" == "version" ]]; then
echo " local : $SCRIPT_VERSION"
echo " master : $master_version ($SCRIPT_MASTER_PATH)"
else
echo " Same SCRIPT_VERSION ($SCRIPT_VERSION) but file contents differ."
echo " master : $SCRIPT_MASTER_PATH"
fi
# Prompt only when stdin is a TTY; in non-interactive runs, abort safely so
# an unattended release never silently runs against a stale script.
if [[ ! -t 0 ]]; then
echo "[ERROR] Non-interactive shell — refusing to auto-update."
echo " Re-run interactively, or set SKIP_SELF_UPDATE=1 to bypass,"
echo " or update manually: cp \"$SCRIPT_MASTER_PATH\" \"$self_abs\""
exit 1
fi
local reply
read -r -p "Update local script from master and re-run? [Y/n] " reply
reply="${reply:-Y}"
if [[ ! "$reply" =~ ^[Yy]$ ]]; then
echo "[INFO] Continuing with local version $SCRIPT_VERSION (not updated)."
echo ""
return 0
fi
if ! cp "$SCRIPT_MASTER_PATH" "$self_abs"; then
echo "[ERROR] Failed to copy master to $self_abs (read-only filesystem?)."
echo " Continuing with local version $SCRIPT_VERSION."
echo ""
return 0
fi
chmod +x "$self_abs" 2>/dev/null || true
echo "[INFO] Updated $self_abs from master. Re-executing..."
echo ""
# Re-exec with original arguments. SKIP_SELF_UPDATE=1 prevents an
# update loop if cp somehow didn't take.
export SKIP_SELF_UPDATE=1
exec "$self_abs" "$@"
}
self_update_check "$@"
# --- Input: prompt if missing ------------------------------------------------
MODE="${1:-}"
if [[ -z "${MODE}" ]]; then
@ -141,19 +251,86 @@ else
echo "[INFO] Repo: $(pwd) (not a git checkout)"
fi
# --- Determine version (release only) ----------------------------------------
# --- Release preflight (BEFORE any docker work) ------------------------------
# All git-side validation for a release happens here so a wrong-branch / dirty
# tree / stale main / conflicting dev / pre-existing tag aborts the run before
# anything is built or pushed to the registry. dev is merged into main now so
# the version we read from changelog.md reflects the merged state, not main's
# pre-merge state.
VERSION=""
DEV_MERGED=0
if [[ "$MODE" == "r" ]]; then
if [[ ! -d ".git" ]]; then
echo "[ERROR] Release mode requires a git checkout."
exit 1
fi
CURRENT_BRANCH="$(git symbolic-ref --short -q HEAD || echo)"
if [[ "$CURRENT_BRANCH" != "main" ]]; then
echo "[ERROR] Release build must run from 'main' branch. Current: ${CURRENT_BRANCH:-<detached>}."
echo " Switch with: git checkout main"
exit 1
fi
if ! git diff --quiet HEAD -- || ! git diff --cached --quiet; then
echo "[ERROR] Working tree has uncommitted changes. Commit or stash them on the appropriate branch before releasing."
git status --short
exit 1
fi
echo "[INFO] Fetching origin..."
git fetch origin main
if git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
git fetch origin dev
fi
if ! git merge --ff-only origin/main 2>/dev/null; then
echo "[ERROR] Local main has diverged from origin/main. Resolve manually before releasing."
exit 1
fi
# Merge dev into main BEFORE reading the version, so changelog.md reflects
# the bumped state that dev brings in.
if git show-ref --verify --quiet refs/heads/dev; then
echo "[INFO] Merging local dev into main..."
if ! git merge --no-ff dev -m "Release (merge dev)"; then
echo "[ERROR] Merge of dev into main failed (conflict). Resolve manually and re-run."
exit 1
fi
DEV_MERGED=1
elif git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
echo "[INFO] Fetching and merging origin/dev into main..."
git fetch origin dev:dev
if ! git merge --no-ff dev -m "Release (merge dev)"; then
echo "[ERROR] Merge of dev into main failed (conflict). Resolve manually and re-run."
exit 1
fi
DEV_MERGED=1
else
echo "[INFO] No dev branch found — releasing main as-is."
fi
VERSION="$(read_version_from_changelog)"
echo "[INFO] Release version (from $CHANGELOG_FILE): $VERSION"
echo "[INFO] Release version (from $CHANGELOG_FILE, post-merge): $VERSION"
validate_tag "$VERSION"
validate_tag "latest"
# Tag collision = abort. A re-release of an existing version with different
# content would silently move what consumers think v0.X.Y points to.
if git rev-parse -q --verify "refs/tags/${VERSION}" >/dev/null; then
echo "[ERROR] Tag ${VERSION} already exists locally. Bump $CHANGELOG_FILE to a new version before releasing."
exit 1
fi
if git ls-remote --exit-code --tags origin "refs/tags/${VERSION}" >/dev/null 2>&1; then
echo "[ERROR] Tag ${VERSION} already exists on origin. Bump $CHANGELOG_FILE to a new version before releasing."
exit 1
fi
# Ask for confirmation so you never accidentally re-push an old version or a wrong one.
read -r -p "Proceed building & pushing as ${VERSION}? [y/N] " CONFIRM
CONFIRM="${CONFIRM:-N}"
if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then
echo "[INFO] Aborted by user."
echo "[INFO] Aborted by user. Note: dev has been merged into local main; reset with 'git reset --hard origin/main' if you want to undo."
exit 0
fi
else
@ -227,10 +404,79 @@ for img in "${BUILT_IMAGES[@]}"; do
done
echo "============================================================"
echo ""
echo "[REMINDER] No git operations were performed. If this was a release,"
echo " commit and tag manually, e.g.:"
# --- Git: release commit + tag + push (release mode only) -------------------
# Preflight (branch, clean tree, ff origin/main, dev merge, tag collision,
# version parse) already ran BEFORE the build. dev is already merged into
# local main. We only need to land the Release commit, tag, and push.
if [[ "$MODE" == "r" ]]; then
echo " git add -A && git commit -m \"Release ${VERSION}\""
echo " git tag -a ${VERSION} -m \"Release ${VERSION}\""
echo " git push && git push --tags"
echo "[INFO] Finalising release: version=${VERSION}"
# Produce a clean Release commit at the tip. Preflight guarantees the working
# tree was clean at start; any post-build artefacts would be unexpected, so
# commit with --allow-empty to keep the release marker isolated.
if git diff --quiet HEAD -- && git diff --cached --quiet; then
git commit --allow-empty -m "Release ${VERSION}"
else
echo "[WARN] Working tree changed during the build — staging and including in release commit."
git add -A
git commit -m "Release ${VERSION}"
fi
git tag -a "${VERSION}" -m "Release ${VERSION}"
# Push main first (triggers prod webhook), then the tag.
git push origin main
git push origin "refs/tags/${VERSION}"
echo "[INFO] Pushed main and tag ${VERSION} to origin."
# Clean up dev branch — local and remote.
if [[ "$DEV_MERGED" == "1" ]]; then
if git show-ref --verify --quiet refs/heads/dev; then
git branch -D dev
echo "[INFO] Deleted local dev branch."
fi
if git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
git push origin --delete dev
echo "[INFO] Deleted remote dev branch."
fi
fi
fi
# --- Git: dev branch commit + push (test mode only) -------------------------
if [[ "$MODE" == "t" ]]; then
if [[ ! -d ".git" ]]; then
echo "[WARN] Not a git checkout — skipping dev branch commit/push."
exit 0
fi
CURRENT_BRANCH="$(git symbolic-ref --short -q HEAD || echo)"
# Ensure we are on the dev branch. Create it if needed.
if [[ "$CURRENT_BRANCH" != "dev" ]]; then
if git show-ref --verify --quiet refs/heads/dev; then
echo "[INFO] Switching to existing local dev branch."
git checkout dev
elif git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then
echo "[INFO] Checking out remote dev branch."
git fetch origin dev
git checkout -b dev origin/dev
else
echo "[INFO] Creating new dev branch from main."
git fetch origin main
git checkout -b dev origin/main
fi
fi
# Stage and commit if there are changes.
git add -A
if git diff --cached --quiet; then
echo "[INFO] Working tree clean — pushing current HEAD to dev."
else
git commit -m "Dev build $(date '+%Y-%m-%d %H:%M')"
fi
# Non-force push. Diverged origin/dev fails hard — resolve manually.
git push -u origin dev
echo "[INFO] Pushed dev to origin."
fi

28
build.sh Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
# Clearview build wrapper. Keeps project-specific version handling out of the
# shared build-and-push.sh script.
#
# Usage:
# ./build.sh t # increment explicit dev/test build segment, then push :dev
# ./build.sh r # validate release version state, then run release build
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$repo_root"
mode="${1:-}"
case "$mode" in
t)
./scripts/bump-dev-build.py
;;
r)
./scripts/check-release-version.py
;;
*)
echo "usage: ./build.sh {t|r}" >&2
exit 2
;;
esac
exec ./build-and-push.sh "$@"

View File

@ -81,6 +81,7 @@
jobAutoRefresh: document.getElementById('jobAutoRefresh'),
// Sidebar / routing
contentTitle: document.getElementById('contentTitle'),
appVersion: document.getElementById('appVersion'),
targetsTableHead: document.getElementById('targetsTableHead'),
targetsHeading: document.getElementById('targetsHeading'),
deviationsTableHead: document.getElementById('deviationsTableHead'),
@ -1573,6 +1574,16 @@
// Init
// -------------------------------------------------------------------------
function loadVersion() {
if (!els.appVersion) return;
requestJson('/api/version')
.then(function (data) {
if (data && data.version) els.appVersion.textContent = data.version;
})
.catch(function () { /* leave placeholder on failure */ });
}
loadVersion();
consumeOnboardingQueryState();
initOnboardingSection().catch(function () {
els.tenantSetupManual.removeAttribute('hidden');

View File

@ -37,7 +37,7 @@
<a href="#/settings" class="nav-link" data-route="settings">Settings</a>
</nav>
<div class="sidebar-foot">
<span class="sidebar-version">v0.1.0</span>
<span class="sidebar-version" id="appVersion" title="Running Clearview build"></span>
</div>
</aside>

View File

@ -38,9 +38,10 @@ from .schemas import (
TenantProfileItem,
)
from .scanners import AuthConfig, probe
from .version import display_version
from .worker import ScanWorker
app = FastAPI(title="Clearview API", version="0.1.0")
app = FastAPI(title="Clearview API", version=display_version().lstrip("v"))
worker = ScanWorker()
SITE_DIR = Path(__file__).resolve().parents[2] / "site"
@ -91,6 +92,12 @@ def healthz() -> dict[str, str]:
return {"status": "ok"}
@app.get("/api/version")
def version() -> dict[str, str]:
"""Return the running build's user-visible version (e.g. v0.1.0.3)."""
return {"version": display_version()}
# ---------------------------------------------------------------------------
# Tenant profiles
# ---------------------------------------------------------------------------
@ -420,13 +427,16 @@ def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequ
@app.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse)
def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
"""
Expand SharePoint group principals on this job's deviations and write
each group's member list to permission_deviations.resolved_members.
Skips claim-encoded principals (federated/AAD), email-shape users, and
SharingLinks groups (those have their own resolver).
Expand group principals on this job's deviations and write each group's
member list to permission_deviations.resolved_members. Handles both
classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups
assigned directly at root (via Microsoft Graph). Skips email-shape users
and SharingLinks groups (those have their own resolver).
"""
from .scanners.sharepoint import (
is_aad_group_principal,
is_sharepoint_group_principal,
resolve_aad_group_members,
resolve_sharing_link_members,
)
@ -462,10 +472,10 @@ def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars()
)
# Group deviations by (site_url, principal) so each unique SP group is resolved once
# Group deviations by (site_url, principal) so each unique group is resolved once
groups: dict[tuple[str, str], list[int]] = {}
for dev in all_deviations:
if not is_sharepoint_group_principal(dev.principal):
if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)):
continue
key = (dev.site_url, dev.principal)
groups.setdefault(key, []).append(dev.id)
@ -475,6 +485,9 @@ def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse:
updated = 0
for (site_url, group_name), dev_ids in groups.items():
try:
if is_aad_group_principal(group_name):
members = resolve_aad_group_members(group_name, auth)
else:
members = resolve_sharing_link_members(site_url, group_name, auth)
except Exception: # noqa: BLE001
members = []

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import re
import time
from dataclasses import dataclass
from urllib.parse import urlparse
@ -258,6 +259,86 @@ def is_sharepoint_group_principal(principal: str) -> bool:
return True
# SharePoint PrincipalType: 1=User, 2=DistributionList, 4=SecurityGroup,
# 8=SharePointGroup, 16=All.
_PRINCIPAL_TYPE_USER = 1
_GUID_RE = re.compile(
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
)
def _extract_user_upn(login: str) -> str:
"""
Pull the UPN/email out of a user claim LoginName so individually granted
users render readably instead of as a raw claim string.
i:0#.f|membership|jan@contoso.com -> jan@contoso.com
i:0#.w|contoso\\jan -> "" (no UPN; caller falls back to Title)
"""
if not login:
return ""
tail = login.rsplit("|", 1)[-1].strip()
return tail if "@" in tail else ""
def _display_principal(login: str, title: str, principal_type: int) -> str:
"""
Choose the most readable identity for a role-assignment member.
For individual users we surface the UPN/email instead of the claim-encoded
LoginName when one is present; for everything else (groups, system/built-in
accounts, on-prem claims) we keep the original LoginName so claim object ids
stay resolvable and the site-root noise filter still recognises them.
"""
if principal_type == _PRINCIPAL_TYPE_USER:
upn = _extract_user_upn(login)
if upn:
return upn
return login or title
def _extract_aad_group_object_id(principal: str) -> str | None:
"""
Return the Entra/AAD object id encoded in a claim principal, or None.
c:0t.c|tenant|<guid> -> <guid> (security group)
c:0o.c|federateddirectoryclaimprovider|<guid> -> <guid> (M365 group members)
c:0o.c|federateddirectoryclaimprovider|<guid>_o -> <guid> (M365 group owners)
"""
if not principal:
return None
lowered = principal.strip().lower()
if not (
lowered.startswith("c:0t.c|tenant|")
or lowered.startswith("c:0o.c|federateddirectoryclaimprovider|")
):
return None
tail = principal.rsplit("|", 1)[-1].strip()
if tail.endswith("_o"):
tail = tail[:-2]
return tail if _GUID_RE.match(tail) else None
def is_aad_group_principal(principal: str) -> bool:
"""True when the principal is an Entra/AAD or M365 group we can expand via Graph."""
return _extract_aad_group_object_id(principal) is not None
def resolve_aad_group_members(principal: str, auth: AuthConfig) -> list[str]:
"""
Expand an Entra/AAD or M365 group assigned directly at root into its
member (and owner) list via Microsoft Graph. Returns an empty list when
the principal is not such a group or when Graph cannot read it.
"""
object_id = _extract_aad_group_object_id(principal)
if not object_id:
return []
validate_auth_config(auth)
seen: set[str] = set()
return _expand_aad_group_by_id(object_id, auth, seen, depth=0)
def _is_noise_principal(principal: str) -> bool:
"""
SharePoint surfaces several principal types at site-root level that are
@ -432,6 +513,28 @@ def _expand_aad_group_via_graph(
if not group_id:
return []
return _expand_aad_group_by_id(group_id, auth, seen, depth)
def _expand_aad_group_by_id(
group_id: str,
auth: AuthConfig,
seen: set[str],
depth: int,
) -> list[str]:
if depth > 3:
return ["… (recursion limit)"]
key = f"id:{group_id.strip().lower()}"
if not group_id.strip() or key in seen:
return []
seen.add(key)
try:
token = _get_token_for_host("graph.microsoft.com", auth)
except Exception: # noqa: BLE001
return []
headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"}
out: list[str] = []
out.extend(_graph_collect(f"/groups/{group_id}/members", headers, auth, seen, depth, owner=False))
out.extend(_graph_collect(f"/groups/{group_id}/owners", headers, auth, seen, depth, owner=True))
@ -606,7 +709,10 @@ def _get_role_assignments(url: str, headers: dict[str, str]) -> list[PermissionE
for item in _extract_values(data):
member = item.get("Member") or {}
principal = str(member.get("LoginName") or member.get("Title") or "").strip()
login = str(member.get("LoginName") or "").strip()
title = str(member.get("Title") or "").strip()
principal_type = _to_int(member.get("PrincipalType"))
principal = _display_principal(login, title, principal_type)
if not principal:
continue

View File

@ -0,0 +1,22 @@
"""Clearview version metadata.
The three-part VERSION is the release version. Dev/test builds append the
explicit BUILD segment. Build numbers are source state, not derived from git
history, so operators can see exactly which image build is running.
"""
from __future__ import annotations
VERSION = "v0.1.0"
BUILD = 1
def display_version() -> str:
"""Return the user-visible Clearview version."""
if BUILD > 0:
return f"{VERSION}.{BUILD}"
return VERSION
def cache_version() -> str:
"""Return the static-asset cache-buster version."""
return display_version().lstrip("v")

View File

@ -2,6 +2,22 @@
This file documents changes on the develop branch of this project.
## 2026-05-26 — Build/version number in the UI (Dropkeep-style)
### Added
- **Version metadata module `clearview_app/version.py`** — single source of truth mirroring Dropkeep: `VERSION = "v0.1.0"` (release) + `BUILD = 0` (explicit dev/test build segment, source state, not git-derived). `display_version()` returns `vX.Y.Z.N` when `BUILD > 0`, else `vX.Y.Z`; `cache_version()` strips the leading `v`.
- **`GET /api/version` endpoint** — returns `{"version": display_version()}`. The FastAPI app `version=` is also sourced from `version.py` (was hardcoded `"0.1.0"`).
- **Version shown in the UI** — the sidebar footer version (previously a hardcoded `v0.1.0` in `index.html`) is now populated at load time from `/api/version` via a new `loadVersion()` in `app.js` (span `id="appVersion"`). Operators see exactly which image build is running, e.g. `v0.1.0.3`.
- **Build wrapper `build.sh` + `scripts/`**`./build.sh t` runs `scripts/bump-dev-build.py` (increments `BUILD`) then `./build-and-push.sh t`; `./build.sh r` runs `scripts/check-release-version.py` (asserts `BUILD == 0` and that `version.py` matches the top `docs/changelog.md` release heading) then `./build-and-push.sh r`. `scripts/set-release-version.py vX.Y.Z` sets a new release version and resets `BUILD = 0`. Build numbers are committed in source so the image carries the exact build with no Docker build args.
## 2026-05-26 — Root report: expand Entra/M365 groups & readable direct users
### Added
- **Entra/AAD & M365 group expansion at site root** — the "Resolve groups" action now also expands Azure AD security groups and Microsoft 365 groups that are assigned **directly** at the site root, not just classic SharePoint site groups. Previously these claim-encoded principals (`c:0t.c|tenant|<guid>`, `c:0o.c|federateddirectoryclaimprovider|<guid>`) were skipped by `is_sharepoint_group_principal`, so the root report showed only the group name and never the people inside — making the inventory incomplete. New helpers in `scanners/sharepoint.py`: `_extract_aad_group_object_id` (parses the Entra object id out of the claim, incl. the `_o` owners suffix), `is_aad_group_principal`, `resolve_aad_group_members`, and `_expand_aad_group_by_id` (extracted from `_expand_aad_group_via_graph` so both mail-based and id-based lookups share the `/groups/{id}/members` + `/owners` Graph path, depth-limited to 3 with a per-resolve `seen` set). `POST /api/scan-jobs/{id}/resolve-groups` now routes AAD/M365 group principals to the Graph resolver and SharePoint groups to the existing `getbyname` resolver. Requires `GroupMember.Read.All` (or `Group.Read.All`) on Microsoft Graph; without it the group stays visible by name and counts as "skipped" — no crash.
### Changed
- **Readable principals for directly-assigned users** — individual users granted rights directly on the site root now render as their UPN/email (e.g. `jan@contoso.com`) instead of the raw claim string `i:0#.f|membership|jan@contoso.com`. New helpers `_extract_user_upn` and `_display_principal` in `scanners/sharepoint.py`, applied in `_get_role_assignments` (so both the root scan and the deviation scan benefit, consistently on both sides of the root-vs-child set comparison). Only users with an `@`-shaped UPN are rewritten; groups, on-prem (`i:0#.w|domain\\user`) and built-in/system accounts keep their original LoginName so claim object ids stay resolvable and the site-root noise filter (`SHAREPOINT\\system`, `NT AUTHORITY\\*`, etc.) keeps matching.
## [2026-04-28]
### Changed

185
docs/code-review-todo.md Normal file
View File

@ -0,0 +1,185 @@
# Code Review TODO — Clearview
**Aangemaakt:** 2026-05-19
**Branch bij review:** `refactor/scanner-package-frontend`
**Scope:** Eerste volledige review (~7.100 regels code)
**Totaal:** 13 CRITICAL · 19 HIGH · 14 MEDIUM · 1 LOW
Werkvolgorde: alle CRITICAL eerst (P0), daarna HIGH (P1), dan MEDIUM/LOW (P2).
Per item staan severity, bestand(en):regel(s), en de gewenste fix.
---
## P0 — CRITICAL (eerst dichten)
### Auth & secrets
- [ ] **Geen authenticatie op enig API-endpoint**
- `containers/clearview/src/clearview_app/main.py` (alle `/api/` routes)
- Fix: API-key via `X-API-Key` header met FastAPI `Security()` dependency, of Bearer-token op alle `/api/` routes.
- [ ] **Client secrets staan plaintext in DB**
- `containers/clearview/src/clearview_app/models.py:21` (`TenantProfile.client_secret`)
- `containers/clearview/src/clearview_app/models.py:45` (`ScanJob.auth_client_secret`)
- Fix: encrypt-at-rest met `cryptography.fernet`; key via env var. Decrypt enkel in geheugen bij gebruik.
- [ ] **`.env` niet in `.gitignore`**
- `.gitignore`, `stack/.env`
- Fix: voeg `stack/.env` en `**/.env` toe aan `.gitignore`; lever `stack/.env.example` met placeholders; verifieer dat `.env` nog niet in git history zit (anders rotate credentials).
- [ ] **Hardcoded DB-fallback `clearview:clearview`**
- `containers/clearview/src/clearview_app/config.py:17-19`
- Fix: verwijder default; `raise RuntimeError("DATABASE_URL required")` als env ontbreekt.
- [ ] **Adminer publiek op `0.0.0.0:8081`**
- `stack/docker-compose.yml:44-46`
- Fix: bind aan `127.0.0.1:${ADMINER_PORT:-8081}:8080` of verwijder uit prod-compose.
### Injectie & exfiltratie
- [ ] **XSS via ongescapete velden in `innerHTML`** (3 vindplaatsen)
- `containers/clearview/site/app.js:658-676` (`job.id`, `job.source_type`, `job.items_scanned`)
- `containers/clearview/site/app.js:885-894` (`job.status`, `total/processed/successful/failed_targets`, `items_scanned`)
- `containers/clearview/site/app.js:175-184` (`statusBadge()` zonder escape op `status`)
- Fix: consequent `escHtml()` op alle API-velden, óók ID's en numerieke. Geen uitzonderingen.
- [ ] **Open redirect via `payload.connect_url`**
- `containers/clearview/site/app.js:472`
- Fix: valideer `new URL(payload.connect_url).protocol === 'https:'` + host-allowlist (`login.microsoftonline.com`).
- [ ] **SSRF / token-exfiltratie via `@odata.nextLink`**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:547-553`
- `containers/clearview/src/clearview_app/scanners/entra.py:227-272`
- Fix: vergelijk `urlparse(next_url).netloc == urlparse(original_url).netloc`; gooi anders een `RuntimeError`.
- [ ] **Header injection in `Content-Disposition`**
- `containers/clearview/src/clearview_app/main.py:701-705`
- Fix: type de route-parameter als `uuid.UUID` zodat FastAPI `job_id` valideert; `urllib.parse.quote(filename, safe="")`.
---
## P1 — HIGH
### Correctheid
- [ ] **Token cache zonder TTL — workers crashen na 1 uur**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:34, 512-543`
- Fix: bewaar `expires_at = time.time() + result["expires_in"] - 60`; invalideer in `_get_token_for_host`.
- [ ] **MSAL `ConfidentialClientApplication` per aanroep**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:530`
- Fix: module-level dict `(tenant_id, client_id, auth_method) -> app`. Hergebruik object.
- [ ] **`_TOKEN_CACHE` zonder lock (race in multi-thread)**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:34`
- Fix: `threading.Lock` rond check-then-write, of `functools.lru_cache` + TTL-wrapper.
- [ ] **Race condition in worker: niet-atomaire job-claim**
- `containers/clearview/src/clearview_app/worker.py:48-68`
- Fix: één `UPDATE scan_jobs SET status='running' WHERE id=:id AND status='queued' RETURNING id` met `FOR UPDATE SKIP LOCKED`.
- [ ] **Auto-refresh race in frontend**
- `containers/clearview/site/app.js:1009-1013` + alle `tick()` callsites
- Fix: `AbortController` per render; vorige request cancelen voordat nieuwe gestart wordt.
- [ ] **Event-listener accumulatie / re-render-pattern**
- `containers/clearview/site/app.js:238-270, 678-702`
- Fix: event delegation op stabiele container (`els.jobsTableBody.addEventListener('click', ...)`).
- [ ] **Sequentieel awaiten van onafhankelijke calls**
- `containers/clearview/site/app.js:554-555, 999, 1281, 1364`
- Fix: `await Promise.all([refreshJobs(), refreshSelectedJob()])`.
- [ ] **Niet-afgehandelde floating promise**
- `containers/clearview/site/app.js:1143`
- Fix: `.catch(err => showFeedback(...))` op `testTargetConnection(...)`.
- [ ] **OAuth state-store is in-memory dict (breekt bij `--workers >1`)**
- `containers/clearview/src/clearview_app/onboarding.py:31, 134-145`
- Fix: state opslaan in DB (tabel `oauth_states` met `created_at`, `consumed_at`) of Redis.
- [ ] **`scan_type` ongevalideerd**
- `containers/clearview/src/clearview_app/schemas.py:36`, `main.py:178, 207`
- Fix: `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` in Pydantic schema.
- [ ] **`datetime.utcnow()` deprecated, timezone-naive** (overal)
- `main.py`, `worker.py`, `models.py:26-27`, `cert.py:33`
- Fix: `datetime.now(timezone.utc)`; `DateTime(timezone=True)` in SQLAlchemy-kolommen.
- [ ] **`ThreadPoolExecutor(max_workers=1)` per target**
- `containers/clearview/src/clearview_app/worker.py:307`
- Fix: gedeelde executor; documenteer dat `future.cancel()` lopende scan niet onderbreekt.
- [ ] **Geen throttling-respect bij 429 in item-loop**
- `containers/clearview/src/clearview_app/scanners/sharepoint.py:603-619`
- Fix: batching via `$expand=RoleAssignments` of exponential backoff op item-niveau.
### Hardening
- [ ] **Container draait als root**
- `containers/clearview/Dockerfile`
- Fix: `RUN adduser --system --ingroup clearview clearview` + `USER clearview`.
- [ ] **`.deb` van packages.microsoft.com zonder checksum**
- `containers/clearview/Dockerfile:17-19`
- Fix: hardcoded SHA256 + `sha256sum --check`, of officiële GPG-key via `signed-by`.
- [ ] **`Install-Module ExchangeOnlineManagement` zonder versie-pin**
- `containers/clearview/Dockerfile:24-26`
- Fix: `-RequiredVersion 3.7.0` (of huidige geteste versie).
- [ ] **Graph-foutberichten gelekt richting frontend**
- `containers/clearview/src/clearview_app/onboarding.py:170, 188`
- Fix: volledig log naar server-side DEBUG; aan client alleen generieke code + HTTP-status.
- [ ] **OData-filter injection via displayName/mail**
- `containers/clearview/src/clearview_app/scanners/entra.py:178-196`
- Fix: `urllib.parse.quote(cleaned.replace("'", "''"), safe="")`.
- [ ] **PowerShell-args zonder UPN-validatie**
- `containers/clearview/src/clearview_app/scanners/mailbox.py:181-190`
- `containers/clearview/src/clearview_app/scanners/exo_scripts/get-permissions.ps1:107`
- Fix: `re.fullmatch(r"[^@\s]{1,64}@[^@\s]{1,255}", upn)` vóór `_run_pwsh`.
### Architectuur
- [ ] **`main.py` is 1139 regels — splitsen**
- Fix: `routers/{tenants,jobs,onboarding}.py`, `services/job_service.py`, `export.py`.
- [ ] **`sharepoint.py` is 722 regels — splitsen**
- Fix: `_auth.py`, `_http.py`, `sharepoint.py` (publieke scanfuncties), `sharing_links.py`.
- [ ] **`_ensure_schema_columns()` met 18 raw `ALTER TABLE`**
- `containers/clearview/src/clearview_app/main.py:1115-1139`
- Fix: vervang door Alembic; commit baseline-migratie + history.
---
## P2 — MEDIUM / LOW
- [ ] **`app.js` 1586 regels, geen build step** — splits in ES-modules + esbuild/rollup; verwijder IIFE-wrapper.
- [ ] **Focus management bij route-wissel ontbreekt**`app.js:1529-1551` — focus naar `<h2>` van nieuwe route na navigatie.
- [ ] **Focus-outline 38% opacity voldoet niet aan WCAG 3:1**`styles.css:292-296``outline: 2px solid var(--cv-accent)`.
- [ ] **Geen debouncing/abort op `jobSiteFilter`**`app.js:1158-1172``AbortController` per fetch.
- [ ] **`els.submitFeedback` gebruikt voor niet-SharePoint feedback** — `app.js:682, 711` — generiek feedback-element of context-specifiek.
- [ ] **Magic string `'__manual__'`**`app.js:412, 424` — named constant.
- [ ] **Icon-knoppen missen `aria-label`**`app.js:229-231``aria-label` toevoegen.
- [ ] **`scanner.py` is shim zonder waarschuwing** — `DeprecationWarning` of verwijderen.
- [ ] **`except Exception: pass` zonder logging** (meerdere) — minimaal `log.warning(..., exc_info=True)`.
- [ ] **`_resolve_credentials(db, ...)` mist type-hint** — `main.py:935``db: Session`.
- [ ] **`CreateScanAppResponse` lekt secret zonder waarschuwing** — `schemas.py:149-155``show_once: bool` veld + log-suppression voor dit endpoint.
- [ ] **Deviations hard-capped op 1000**`main.py:728` — voeg `total_deviations_count` + `truncated: bool` toe.
- [ ] **Geen `logging` in scanners-package**`import logging; logger = logging.getLogger(__name__)` overal.
- [ ] **`list-mailboxes.ps1` laadt alles in geheugen** — `-ResultSize Unlimited` → paginering of cap.
- [ ] **`cert.py` private key zonder encryptie** — documenteer aanname dat caller encryptie-at-rest doet.
- [ ] **`build-and-push.sh` doet `git add -A`** — `build-and-push.sh:294` — expliciete file-lijst of bevestigingsprompt.
- [ ] **README build-instructies kloppen niet (1/2/3 vs t/r)**`README.md:80-84`.
---
## Werkwijze
1. Werk per checkbox; verwijder geen items maar zet `[x]` als done.
2. Bij oplevering van een batch: append entry in `docs/changelog-develop.md`.
3. CRITICAL items vereisen handmatige verificatie (auth-tests, secret-rotation, XSS-payload checks).
4. Na P0 + P1: hertest met deze TODO als checklist voordat een nieuwe review wordt aangevraagd.

22
scripts/bump-dev-build.py Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""Increment Clearview's explicit dev/test build number."""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
text = VERSION_FILE.read_text()
match = re.search(r"^BUILD = (\d+)\s*$", text, flags=re.MULTILINE)
if not match:
raise SystemExit(f"BUILD assignment not found in {VERSION_FILE}")
next_build = int(match.group(1)) + 1
text = text[: match.start(1)] + str(next_build) + text[match.end(1) :]
VERSION_FILE.write_text(text)
version_match = re.search(r'^VERSION = ["\']([^"\']+)["\']\s*$', text, flags=re.MULTILINE)
version = version_match.group(1) if version_match else "v?.?.?"
print(f"[bump-dev-build] {version}.{next_build}")

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
"""Validate Clearview release version state before a release build."""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
CHANGELOG = ROOT / "docs" / "changelog.md"
ns: dict[str, object] = {}
exec(VERSION_FILE.read_text(), ns)
version = str(ns.get("VERSION", ""))
build = int(ns.get("BUILD", -1))
if build != 0:
raise SystemExit(f"Release builds require BUILD = 0 in {VERSION_FILE}; found BUILD = {build}")
match = re.search(r"^## (v\d+\.\d+\.\d+) — \d{4}-\d{2}-\d{2}\s*$", CHANGELOG.read_text(), flags=re.MULTILINE)
if not match:
raise SystemExit(f"No release heading found in {CHANGELOG}; expected '## vX.Y.Z — YYYY-MM-DD'")
changelog_version = match.group(1)
if changelog_version != version:
raise SystemExit(
f"Version mismatch: {VERSION_FILE} has {version}, but top changelog release is {changelog_version}"
)
print(f"[check-release-version] {version}")

24
scripts/set-release-version.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""Set Clearview's release version and reset the dev/test build segment."""
from __future__ import annotations
import re
import sys
from pathlib import Path
if len(sys.argv) != 2:
raise SystemExit("usage: scripts/set-release-version.py vX.Y.Z")
version = sys.argv[1]
if not re.fullmatch(r"v\d+\.\d+\.\d+", version):
raise SystemExit("version must match vX.Y.Z, for example v0.1.1")
ROOT = Path(__file__).resolve().parents[1]
VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py"
text = VERSION_FILE.read_text()
text, n_version = re.subn(r'^VERSION = ["\'][^"\']+["\']\s*$', f'VERSION = "{version}"', text, count=1, flags=re.MULTILINE)
text, n_build = re.subn(r"^BUILD = \d+\s*$", "BUILD = 0", text, count=1, flags=re.MULTILINE)
if n_version != 1 or n_build != 1:
raise SystemExit(f"Could not update VERSION/BUILD in {VERSION_FILE}")
VERSION_FILE.write_text(text)
print(f"[set-release-version] {version}")