From 61db7fe4a7c8d758b856a94589a7ec2f766f7e33 Mon Sep 17 00:00:00 2001 From: Ivo Oskamp Date: Tue, 26 May 2026 14:09:27 +0200 Subject: [PATCH] Dev build 2026-05-26 14:09 --- build-and-push.sh | 272 +++++++++++++++++- build.sh | 28 ++ containers/clearview/site/app.js | 11 + containers/clearview/site/index.html | 2 +- .../clearview/src/clearview_app/main.py | 29 +- .../src/clearview_app/scanners/sharepoint.py | 108 ++++++- .../clearview/src/clearview_app/version.py | 22 ++ docs/changelog-develop.md | 16 ++ docs/code-review-todo.md | 185 ++++++++++++ scripts/bump-dev-build.py | 22 ++ scripts/check-release-version.py | 30 ++ scripts/set-release-version.py | 24 ++ 12 files changed, 726 insertions(+), 23 deletions(-) create mode 100755 build.sh create mode 100644 containers/clearview/src/clearview_app/version.py create mode 100644 docs/code-review-todo.md create mode 100755 scripts/bump-dev-build.py create mode 100755 scripts/check-release-version.py create mode 100755 scripts/set-release-version.py diff --git a/build-and-push.sh b/build-and-push.sh index 6b4be0e..130102f 100755 --- a/build-and-push.sh +++ b/build-and-push.sh @@ -6,12 +6,24 @@ set -euo pipefail # # Purpose: # - Build & push Docker images for each service under ./containers/* +# - Branch model: `main` is permanent and tracked by the prod stack +# (image `:latest`). `dev` is a short-lived branch tracked by the dev +# stack (image `:dev`); it is recreated from `main` at the start of each +# cycle and deleted after each release. Tags `vX.Y.Z` cover rollback — +# no `release/` snapshot branches. # - Two modes: -# t (test) = only push :dev -# r (release) = push :, :dev, :latest -# version is read from the top of changelog.md -# -# No git operations: committing and tagging is done manually. +# t (test) = build & push :dev. Switches to (or creates) the `dev` +# branch from `main` if not already on it, commits any +# pending changes, and pushes `dev` to origin so the dev +# stack picks up the matching compose/config alongside +# the :dev image. +# r (release) = MUST be run from `main`. Build & push :, +# :dev, :latest. Fast-forwards `main` against +# `origin/main`, merges `dev` into `main` if it exists +# (local or remote), creates a `Release vX.Y.Z` commit, +# tags it, pushes `main` and the tag, and deletes the +# `dev` branch locally and on origin. Version is read +# from the top of changelog.md. # # Usage: # ./build-and-push.sh [mode] @@ -25,14 +37,112 @@ set -euo pipefail # ## vX.Y.Z — YYYY-MM-DD # (the version is parsed from the first such line) # - One Dockerfile per service under ./containers//Dockerfile +# +# ---------------------------------------------------------------------------- +# Self-update notice (for humans AND for Claude): +# +# The canonical/master copy of this script lives at: +# /docker/develop/shared-integrations/tooling/docker-build-and-push/build-and-push.sh +# +# When this script is copied into a project repo, it may drift from master. +# On startup the script compares its own SCRIPT_VERSION to the master copy and +# warns if a newer version exists. It does NOT auto-overwrite (too risky during +# a release run). +# +# CLAUDE INSTRUCTION: +# If you (Claude) are about to invoke this script in a project repo, first +# check whether the master copy at the path above is newer (different +# SCRIPT_VERSION or differing file contents). If so, copy the master over +# the local copy BEFORE running, then proceed. Mention the update to the user. # ============================================================================ +SCRIPT_VERSION="1.7.0" +SCRIPT_MASTER_PATH="/docker/develop/shared-integrations/tooling/docker-build-and-push/build-and-push.sh" + DOCKER_REGISTRY="gitea.oskamp.info" DOCKER_NAMESPACE="ivooskamp" CHANGELOG_FILE="docs/changelog.md" CONTAINERS_DIR="containers" +# --- Self-update check ------------------------------------------------------- +# Compare this script to the canonical master copy. If it differs, offer to +# copy master over the local copy and re-exec with the same arguments so the +# build runs against the up-to-date script. +# +# Skip with: SKIP_SELF_UPDATE=1 ./build-and-push.sh ... +self_update_check() { + if [[ "${SKIP_SELF_UPDATE:-0}" == "1" ]]; then + return 0 + fi + + local self_path="${BASH_SOURCE[0]}" + # Resolve to absolute path so a comparison against itself is detected. + local self_abs + self_abs="$(cd "$(dirname "$self_path")" 2>/dev/null && pwd)/$(basename "$self_path")" || self_abs="$self_path" + + if [[ "$self_abs" == "$SCRIPT_MASTER_PATH" ]]; then + return 0 # We ARE the master copy. + fi + if [[ ! -f "$SCRIPT_MASTER_PATH" ]]; then + return 0 # Master not reachable from this host; silently skip. + fi + + local master_version reason="" + master_version="$(grep -m1 -E '^SCRIPT_VERSION=' "$SCRIPT_MASTER_PATH" | sed -E 's/.*"([^"]+)".*/\1/')" + + if [[ -n "$master_version" && "$master_version" != "$SCRIPT_VERSION" ]]; then + reason="version" + elif ! cmp -s "$self_abs" "$SCRIPT_MASTER_PATH"; then + reason="contents" + else + return 0 # Identical to master. + fi + + echo "[WARN] Local build-and-push.sh differs from master." + if [[ "$reason" == "version" ]]; then + echo " local : $SCRIPT_VERSION" + echo " master : $master_version ($SCRIPT_MASTER_PATH)" + else + echo " Same SCRIPT_VERSION ($SCRIPT_VERSION) but file contents differ." + echo " master : $SCRIPT_MASTER_PATH" + fi + + # Prompt only when stdin is a TTY; in non-interactive runs, abort safely so + # an unattended release never silently runs against a stale script. + if [[ ! -t 0 ]]; then + echo "[ERROR] Non-interactive shell — refusing to auto-update." + echo " Re-run interactively, or set SKIP_SELF_UPDATE=1 to bypass," + echo " or update manually: cp \"$SCRIPT_MASTER_PATH\" \"$self_abs\"" + exit 1 + fi + + local reply + read -r -p "Update local script from master and re-run? [Y/n] " reply + reply="${reply:-Y}" + if [[ ! "$reply" =~ ^[Yy]$ ]]; then + echo "[INFO] Continuing with local version $SCRIPT_VERSION (not updated)." + echo "" + return 0 + fi + + if ! cp "$SCRIPT_MASTER_PATH" "$self_abs"; then + echo "[ERROR] Failed to copy master to $self_abs (read-only filesystem?)." + echo " Continuing with local version $SCRIPT_VERSION." + echo "" + return 0 + fi + chmod +x "$self_abs" 2>/dev/null || true + + echo "[INFO] Updated $self_abs from master. Re-executing..." + echo "" + # Re-exec with original arguments. SKIP_SELF_UPDATE=1 prevents an + # update loop if cp somehow didn't take. + export SKIP_SELF_UPDATE=1 + exec "$self_abs" "$@" +} +self_update_check "$@" + # --- Input: prompt if missing ------------------------------------------------ MODE="${1:-}" if [[ -z "${MODE}" ]]; then @@ -141,19 +251,86 @@ else echo "[INFO] Repo: $(pwd) (not a git checkout)" fi -# --- Determine version (release only) ---------------------------------------- +# --- Release preflight (BEFORE any docker work) ------------------------------ +# All git-side validation for a release happens here so a wrong-branch / dirty +# tree / stale main / conflicting dev / pre-existing tag aborts the run before +# anything is built or pushed to the registry. dev is merged into main now so +# the version we read from changelog.md reflects the merged state, not main's +# pre-merge state. VERSION="" +DEV_MERGED=0 if [[ "$MODE" == "r" ]]; then + if [[ ! -d ".git" ]]; then + echo "[ERROR] Release mode requires a git checkout." + exit 1 + fi + + CURRENT_BRANCH="$(git symbolic-ref --short -q HEAD || echo)" + if [[ "$CURRENT_BRANCH" != "main" ]]; then + echo "[ERROR] Release build must run from 'main' branch. Current: ${CURRENT_BRANCH:-}." + echo " Switch with: git checkout main" + exit 1 + fi + + if ! git diff --quiet HEAD -- || ! git diff --cached --quiet; then + echo "[ERROR] Working tree has uncommitted changes. Commit or stash them on the appropriate branch before releasing." + git status --short + exit 1 + fi + + echo "[INFO] Fetching origin..." + git fetch origin main + if git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then + git fetch origin dev + fi + + if ! git merge --ff-only origin/main 2>/dev/null; then + echo "[ERROR] Local main has diverged from origin/main. Resolve manually before releasing." + exit 1 + fi + + # Merge dev into main BEFORE reading the version, so changelog.md reflects + # the bumped state that dev brings in. + if git show-ref --verify --quiet refs/heads/dev; then + echo "[INFO] Merging local dev into main..." + if ! git merge --no-ff dev -m "Release (merge dev)"; then + echo "[ERROR] Merge of dev into main failed (conflict). Resolve manually and re-run." + exit 1 + fi + DEV_MERGED=1 + elif git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then + echo "[INFO] Fetching and merging origin/dev into main..." + git fetch origin dev:dev + if ! git merge --no-ff dev -m "Release (merge dev)"; then + echo "[ERROR] Merge of dev into main failed (conflict). Resolve manually and re-run." + exit 1 + fi + DEV_MERGED=1 + else + echo "[INFO] No dev branch found — releasing main as-is." + fi + VERSION="$(read_version_from_changelog)" - echo "[INFO] Release version (from $CHANGELOG_FILE): $VERSION" + echo "[INFO] Release version (from $CHANGELOG_FILE, post-merge): $VERSION" validate_tag "$VERSION" validate_tag "latest" + # Tag collision = abort. A re-release of an existing version with different + # content would silently move what consumers think v0.X.Y points to. + if git rev-parse -q --verify "refs/tags/${VERSION}" >/dev/null; then + echo "[ERROR] Tag ${VERSION} already exists locally. Bump $CHANGELOG_FILE to a new version before releasing." + exit 1 + fi + if git ls-remote --exit-code --tags origin "refs/tags/${VERSION}" >/dev/null 2>&1; then + echo "[ERROR] Tag ${VERSION} already exists on origin. Bump $CHANGELOG_FILE to a new version before releasing." + exit 1 + fi + # Ask for confirmation so you never accidentally re-push an old version or a wrong one. read -r -p "Proceed building & pushing as ${VERSION}? [y/N] " CONFIRM CONFIRM="${CONFIRM:-N}" if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then - echo "[INFO] Aborted by user." + echo "[INFO] Aborted by user. Note: dev has been merged into local main; reset with 'git reset --hard origin/main' if you want to undo." exit 0 fi else @@ -227,10 +404,79 @@ for img in "${BUILT_IMAGES[@]}"; do done echo "============================================================" echo "" -echo "[REMINDER] No git operations were performed. If this was a release," -echo " commit and tag manually, e.g.:" + +# --- Git: release commit + tag + push (release mode only) ------------------- +# Preflight (branch, clean tree, ff origin/main, dev merge, tag collision, +# version parse) already ran BEFORE the build. dev is already merged into +# local main. We only need to land the Release commit, tag, and push. if [[ "$MODE" == "r" ]]; then - echo " git add -A && git commit -m \"Release ${VERSION}\"" - echo " git tag -a ${VERSION} -m \"Release ${VERSION}\"" - echo " git push && git push --tags" + echo "[INFO] Finalising release: version=${VERSION}" + + # Produce a clean Release commit at the tip. Preflight guarantees the working + # tree was clean at start; any post-build artefacts would be unexpected, so + # commit with --allow-empty to keep the release marker isolated. + if git diff --quiet HEAD -- && git diff --cached --quiet; then + git commit --allow-empty -m "Release ${VERSION}" + else + echo "[WARN] Working tree changed during the build — staging and including in release commit." + git add -A + git commit -m "Release ${VERSION}" + fi + + git tag -a "${VERSION}" -m "Release ${VERSION}" + + # Push main first (triggers prod webhook), then the tag. + git push origin main + git push origin "refs/tags/${VERSION}" + echo "[INFO] Pushed main and tag ${VERSION} to origin." + + # Clean up dev branch — local and remote. + if [[ "$DEV_MERGED" == "1" ]]; then + if git show-ref --verify --quiet refs/heads/dev; then + git branch -D dev + echo "[INFO] Deleted local dev branch." + fi + if git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then + git push origin --delete dev + echo "[INFO] Deleted remote dev branch." + fi + fi +fi + +# --- Git: dev branch commit + push (test mode only) ------------------------- +if [[ "$MODE" == "t" ]]; then + if [[ ! -d ".git" ]]; then + echo "[WARN] Not a git checkout — skipping dev branch commit/push." + exit 0 + fi + + CURRENT_BRANCH="$(git symbolic-ref --short -q HEAD || echo)" + + # Ensure we are on the dev branch. Create it if needed. + if [[ "$CURRENT_BRANCH" != "dev" ]]; then + if git show-ref --verify --quiet refs/heads/dev; then + echo "[INFO] Switching to existing local dev branch." + git checkout dev + elif git ls-remote --exit-code --heads origin dev >/dev/null 2>&1; then + echo "[INFO] Checking out remote dev branch." + git fetch origin dev + git checkout -b dev origin/dev + else + echo "[INFO] Creating new dev branch from main." + git fetch origin main + git checkout -b dev origin/main + fi + fi + + # Stage and commit if there are changes. + git add -A + if git diff --cached --quiet; then + echo "[INFO] Working tree clean — pushing current HEAD to dev." + else + git commit -m "Dev build $(date '+%Y-%m-%d %H:%M')" + fi + + # Non-force push. Diverged origin/dev fails hard — resolve manually. + git push -u origin dev + echo "[INFO] Pushed dev to origin." fi diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..c8d5b5c --- /dev/null +++ b/build.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Clearview build wrapper. Keeps project-specific version handling out of the +# shared build-and-push.sh script. +# +# Usage: +# ./build.sh t # increment explicit dev/test build segment, then push :dev +# ./build.sh r # validate release version state, then run release build + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$repo_root" + +mode="${1:-}" +case "$mode" in + t) + ./scripts/bump-dev-build.py + ;; + r) + ./scripts/check-release-version.py + ;; + *) + echo "usage: ./build.sh {t|r}" >&2 + exit 2 + ;; +esac + +exec ./build-and-push.sh "$@" diff --git a/containers/clearview/site/app.js b/containers/clearview/site/app.js index def074f..e5ce1ac 100644 --- a/containers/clearview/site/app.js +++ b/containers/clearview/site/app.js @@ -81,6 +81,7 @@ jobAutoRefresh: document.getElementById('jobAutoRefresh'), // Sidebar / routing contentTitle: document.getElementById('contentTitle'), + appVersion: document.getElementById('appVersion'), targetsTableHead: document.getElementById('targetsTableHead'), targetsHeading: document.getElementById('targetsHeading'), deviationsTableHead: document.getElementById('deviationsTableHead'), @@ -1573,6 +1574,16 @@ // Init // ------------------------------------------------------------------------- + function loadVersion() { + if (!els.appVersion) return; + requestJson('/api/version') + .then(function (data) { + if (data && data.version) els.appVersion.textContent = data.version; + }) + .catch(function () { /* leave placeholder on failure */ }); + } + + loadVersion(); consumeOnboardingQueryState(); initOnboardingSection().catch(function () { els.tenantSetupManual.removeAttribute('hidden'); diff --git a/containers/clearview/site/index.html b/containers/clearview/site/index.html index b3247e2..2f63b0c 100644 --- a/containers/clearview/site/index.html +++ b/containers/clearview/site/index.html @@ -37,7 +37,7 @@ Settings diff --git a/containers/clearview/src/clearview_app/main.py b/containers/clearview/src/clearview_app/main.py index 2f4d259..7a8ff30 100644 --- a/containers/clearview/src/clearview_app/main.py +++ b/containers/clearview/src/clearview_app/main.py @@ -38,9 +38,10 @@ from .schemas import ( TenantProfileItem, ) from .scanners import AuthConfig, probe +from .version import display_version from .worker import ScanWorker -app = FastAPI(title="Clearview API", version="0.1.0") +app = FastAPI(title="Clearview API", version=display_version().lstrip("v")) worker = ScanWorker() SITE_DIR = Path(__file__).resolve().parents[2] / "site" @@ -91,6 +92,12 @@ def healthz() -> dict[str, str]: return {"status": "ok"} +@app.get("/api/version") +def version() -> dict[str, str]: + """Return the running build's user-visible version (e.g. v0.1.0.3).""" + return {"version": display_version()} + + # --------------------------------------------------------------------------- # Tenant profiles # --------------------------------------------------------------------------- @@ -420,13 +427,16 @@ def resolve_sharing_links_endpoint(job_id: str, payload: ResolveSharingLinksRequ @app.post("/api/scan-jobs/{job_id}/resolve-groups", response_model=ResolveGroupsResponse) def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse: """ - Expand SharePoint group principals on this job's deviations and write - each group's member list to permission_deviations.resolved_members. - Skips claim-encoded principals (federated/AAD), email-shape users, and - SharingLinks groups (those have their own resolver). + Expand group principals on this job's deviations and write each group's + member list to permission_deviations.resolved_members. Handles both + classic SharePoint groups (via getbyname) and Entra/AAD or M365 groups + assigned directly at root (via Microsoft Graph). Skips email-shape users + and SharingLinks groups (those have their own resolver). """ from .scanners.sharepoint import ( + is_aad_group_principal, is_sharepoint_group_principal, + resolve_aad_group_members, resolve_sharing_link_members, ) @@ -462,10 +472,10 @@ def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse: db.execute(select(PermissionDeviation).where(PermissionDeviation.job_id == job_id)).scalars() ) - # Group deviations by (site_url, principal) so each unique SP group is resolved once + # Group deviations by (site_url, principal) so each unique group is resolved once groups: dict[tuple[str, str], list[int]] = {} for dev in all_deviations: - if not is_sharepoint_group_principal(dev.principal): + if not (is_sharepoint_group_principal(dev.principal) or is_aad_group_principal(dev.principal)): continue key = (dev.site_url, dev.principal) groups.setdefault(key, []).append(dev.id) @@ -475,7 +485,10 @@ def resolve_groups_endpoint(job_id: str) -> ResolveGroupsResponse: updated = 0 for (site_url, group_name), dev_ids in groups.items(): try: - members = resolve_sharing_link_members(site_url, group_name, auth) + if is_aad_group_principal(group_name): + members = resolve_aad_group_members(group_name, auth) + else: + members = resolve_sharing_link_members(site_url, group_name, auth) except Exception: # noqa: BLE001 members = [] diff --git a/containers/clearview/src/clearview_app/scanners/sharepoint.py b/containers/clearview/src/clearview_app/scanners/sharepoint.py index dc3b77f..097aefa 100644 --- a/containers/clearview/src/clearview_app/scanners/sharepoint.py +++ b/containers/clearview/src/clearview_app/scanners/sharepoint.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re import time from dataclasses import dataclass from urllib.parse import urlparse @@ -258,6 +259,86 @@ def is_sharepoint_group_principal(principal: str) -> bool: return True +# SharePoint PrincipalType: 1=User, 2=DistributionList, 4=SecurityGroup, +# 8=SharePointGroup, 16=All. +_PRINCIPAL_TYPE_USER = 1 + +_GUID_RE = re.compile( + r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" +) + + +def _extract_user_upn(login: str) -> str: + """ + Pull the UPN/email out of a user claim LoginName so individually granted + users render readably instead of as a raw claim string. + + i:0#.f|membership|jan@contoso.com -> jan@contoso.com + i:0#.w|contoso\\jan -> "" (no UPN; caller falls back to Title) + """ + if not login: + return "" + tail = login.rsplit("|", 1)[-1].strip() + return tail if "@" in tail else "" + + +def _display_principal(login: str, title: str, principal_type: int) -> str: + """ + Choose the most readable identity for a role-assignment member. + + For individual users we surface the UPN/email instead of the claim-encoded + LoginName when one is present; for everything else (groups, system/built-in + accounts, on-prem claims) we keep the original LoginName so claim object ids + stay resolvable and the site-root noise filter still recognises them. + """ + if principal_type == _PRINCIPAL_TYPE_USER: + upn = _extract_user_upn(login) + if upn: + return upn + return login or title + + +def _extract_aad_group_object_id(principal: str) -> str | None: + """ + Return the Entra/AAD object id encoded in a claim principal, or None. + + c:0t.c|tenant| -> (security group) + c:0o.c|federateddirectoryclaimprovider| -> (M365 group members) + c:0o.c|federateddirectoryclaimprovider|_o -> (M365 group owners) + """ + if not principal: + return None + lowered = principal.strip().lower() + if not ( + lowered.startswith("c:0t.c|tenant|") + or lowered.startswith("c:0o.c|federateddirectoryclaimprovider|") + ): + return None + tail = principal.rsplit("|", 1)[-1].strip() + if tail.endswith("_o"): + tail = tail[:-2] + return tail if _GUID_RE.match(tail) else None + + +def is_aad_group_principal(principal: str) -> bool: + """True when the principal is an Entra/AAD or M365 group we can expand via Graph.""" + return _extract_aad_group_object_id(principal) is not None + + +def resolve_aad_group_members(principal: str, auth: AuthConfig) -> list[str]: + """ + Expand an Entra/AAD or M365 group assigned directly at root into its + member (and owner) list via Microsoft Graph. Returns an empty list when + the principal is not such a group or when Graph cannot read it. + """ + object_id = _extract_aad_group_object_id(principal) + if not object_id: + return [] + validate_auth_config(auth) + seen: set[str] = set() + return _expand_aad_group_by_id(object_id, auth, seen, depth=0) + + def _is_noise_principal(principal: str) -> bool: """ SharePoint surfaces several principal types at site-root level that are @@ -432,6 +513,28 @@ def _expand_aad_group_via_graph( if not group_id: return [] + return _expand_aad_group_by_id(group_id, auth, seen, depth) + + +def _expand_aad_group_by_id( + group_id: str, + auth: AuthConfig, + seen: set[str], + depth: int, +) -> list[str]: + if depth > 3: + return ["… (recursion limit)"] + key = f"id:{group_id.strip().lower()}" + if not group_id.strip() or key in seen: + return [] + seen.add(key) + + try: + token = _get_token_for_host("graph.microsoft.com", auth) + except Exception: # noqa: BLE001 + return [] + + headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"} out: list[str] = [] out.extend(_graph_collect(f"/groups/{group_id}/members", headers, auth, seen, depth, owner=False)) out.extend(_graph_collect(f"/groups/{group_id}/owners", headers, auth, seen, depth, owner=True)) @@ -606,7 +709,10 @@ def _get_role_assignments(url: str, headers: dict[str, str]) -> list[PermissionE for item in _extract_values(data): member = item.get("Member") or {} - principal = str(member.get("LoginName") or member.get("Title") or "").strip() + login = str(member.get("LoginName") or "").strip() + title = str(member.get("Title") or "").strip() + principal_type = _to_int(member.get("PrincipalType")) + principal = _display_principal(login, title, principal_type) if not principal: continue diff --git a/containers/clearview/src/clearview_app/version.py b/containers/clearview/src/clearview_app/version.py new file mode 100644 index 0000000..b31ae7a --- /dev/null +++ b/containers/clearview/src/clearview_app/version.py @@ -0,0 +1,22 @@ +"""Clearview version metadata. + +The three-part VERSION is the release version. Dev/test builds append the +explicit BUILD segment. Build numbers are source state, not derived from git +history, so operators can see exactly which image build is running. +""" +from __future__ import annotations + +VERSION = "v0.1.0" +BUILD = 1 + + +def display_version() -> str: + """Return the user-visible Clearview version.""" + if BUILD > 0: + return f"{VERSION}.{BUILD}" + return VERSION + + +def cache_version() -> str: + """Return the static-asset cache-buster version.""" + return display_version().lstrip("v") diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md index e404b10..1097da5 100644 --- a/docs/changelog-develop.md +++ b/docs/changelog-develop.md @@ -2,6 +2,22 @@ This file documents changes on the develop branch of this project. +## 2026-05-26 — Build/version number in the UI (Dropkeep-style) + +### Added +- **Version metadata module `clearview_app/version.py`** — single source of truth mirroring Dropkeep: `VERSION = "v0.1.0"` (release) + `BUILD = 0` (explicit dev/test build segment, source state, not git-derived). `display_version()` returns `vX.Y.Z.N` when `BUILD > 0`, else `vX.Y.Z`; `cache_version()` strips the leading `v`. +- **`GET /api/version` endpoint** — returns `{"version": display_version()}`. The FastAPI app `version=` is also sourced from `version.py` (was hardcoded `"0.1.0"`). +- **Version shown in the UI** — the sidebar footer version (previously a hardcoded `v0.1.0` in `index.html`) is now populated at load time from `/api/version` via a new `loadVersion()` in `app.js` (span `id="appVersion"`). Operators see exactly which image build is running, e.g. `v0.1.0.3`. +- **Build wrapper `build.sh` + `scripts/`** — `./build.sh t` runs `scripts/bump-dev-build.py` (increments `BUILD`) then `./build-and-push.sh t`; `./build.sh r` runs `scripts/check-release-version.py` (asserts `BUILD == 0` and that `version.py` matches the top `docs/changelog.md` release heading) then `./build-and-push.sh r`. `scripts/set-release-version.py vX.Y.Z` sets a new release version and resets `BUILD = 0`. Build numbers are committed in source so the image carries the exact build with no Docker build args. + +## 2026-05-26 — Root report: expand Entra/M365 groups & readable direct users + +### Added +- **Entra/AAD & M365 group expansion at site root** — the "Resolve groups" action now also expands Azure AD security groups and Microsoft 365 groups that are assigned **directly** at the site root, not just classic SharePoint site groups. Previously these claim-encoded principals (`c:0t.c|tenant|`, `c:0o.c|federateddirectoryclaimprovider|`) were skipped by `is_sharepoint_group_principal`, so the root report showed only the group name and never the people inside — making the inventory incomplete. New helpers in `scanners/sharepoint.py`: `_extract_aad_group_object_id` (parses the Entra object id out of the claim, incl. the `_o` owners suffix), `is_aad_group_principal`, `resolve_aad_group_members`, and `_expand_aad_group_by_id` (extracted from `_expand_aad_group_via_graph` so both mail-based and id-based lookups share the `/groups/{id}/members` + `/owners` Graph path, depth-limited to 3 with a per-resolve `seen` set). `POST /api/scan-jobs/{id}/resolve-groups` now routes AAD/M365 group principals to the Graph resolver and SharePoint groups to the existing `getbyname` resolver. Requires `GroupMember.Read.All` (or `Group.Read.All`) on Microsoft Graph; without it the group stays visible by name and counts as "skipped" — no crash. + +### Changed +- **Readable principals for directly-assigned users** — individual users granted rights directly on the site root now render as their UPN/email (e.g. `jan@contoso.com`) instead of the raw claim string `i:0#.f|membership|jan@contoso.com`. New helpers `_extract_user_upn` and `_display_principal` in `scanners/sharepoint.py`, applied in `_get_role_assignments` (so both the root scan and the deviation scan benefit, consistently on both sides of the root-vs-child set comparison). Only users with an `@`-shaped UPN are rewritten; groups, on-prem (`i:0#.w|domain\\user`) and built-in/system accounts keep their original LoginName so claim object ids stay resolvable and the site-root noise filter (`SHAREPOINT\\system`, `NT AUTHORITY\\*`, etc.) keeps matching. + ## [2026-04-28] ### Changed diff --git a/docs/code-review-todo.md b/docs/code-review-todo.md new file mode 100644 index 0000000..7a5f37b --- /dev/null +++ b/docs/code-review-todo.md @@ -0,0 +1,185 @@ +# Code Review TODO — Clearview + +**Aangemaakt:** 2026-05-19 +**Branch bij review:** `refactor/scanner-package-frontend` +**Scope:** Eerste volledige review (~7.100 regels code) +**Totaal:** 13 CRITICAL · 19 HIGH · 14 MEDIUM · 1 LOW + +Werkvolgorde: alle CRITICAL eerst (P0), daarna HIGH (P1), dan MEDIUM/LOW (P2). +Per item staan severity, bestand(en):regel(s), en de gewenste fix. + +--- + +## P0 — CRITICAL (eerst dichten) + +### Auth & secrets + +- [ ] **Geen authenticatie op enig API-endpoint** + - `containers/clearview/src/clearview_app/main.py` (alle `/api/` routes) + - Fix: API-key via `X-API-Key` header met FastAPI `Security()` dependency, of Bearer-token op alle `/api/` routes. + +- [ ] **Client secrets staan plaintext in DB** + - `containers/clearview/src/clearview_app/models.py:21` (`TenantProfile.client_secret`) + - `containers/clearview/src/clearview_app/models.py:45` (`ScanJob.auth_client_secret`) + - Fix: encrypt-at-rest met `cryptography.fernet`; key via env var. Decrypt enkel in geheugen bij gebruik. + +- [ ] **`.env` niet in `.gitignore`** + - `.gitignore`, `stack/.env` + - Fix: voeg `stack/.env` en `**/.env` toe aan `.gitignore`; lever `stack/.env.example` met placeholders; verifieer dat `.env` nog niet in git history zit (anders rotate credentials). + +- [ ] **Hardcoded DB-fallback `clearview:clearview`** + - `containers/clearview/src/clearview_app/config.py:17-19` + - Fix: verwijder default; `raise RuntimeError("DATABASE_URL required")` als env ontbreekt. + +- [ ] **Adminer publiek op `0.0.0.0:8081`** + - `stack/docker-compose.yml:44-46` + - Fix: bind aan `127.0.0.1:${ADMINER_PORT:-8081}:8080` of verwijder uit prod-compose. + +### Injectie & exfiltratie + +- [ ] **XSS via ongescapete velden in `innerHTML`** (3 vindplaatsen) + - `containers/clearview/site/app.js:658-676` (`job.id`, `job.source_type`, `job.items_scanned`) + - `containers/clearview/site/app.js:885-894` (`job.status`, `total/processed/successful/failed_targets`, `items_scanned`) + - `containers/clearview/site/app.js:175-184` (`statusBadge()` zonder escape op `status`) + - Fix: consequent `escHtml()` op alle API-velden, óók ID's en numerieke. Geen uitzonderingen. + +- [ ] **Open redirect via `payload.connect_url`** + - `containers/clearview/site/app.js:472` + - Fix: valideer `new URL(payload.connect_url).protocol === 'https:'` + host-allowlist (`login.microsoftonline.com`). + +- [ ] **SSRF / token-exfiltratie via `@odata.nextLink`** + - `containers/clearview/src/clearview_app/scanners/sharepoint.py:547-553` + - `containers/clearview/src/clearview_app/scanners/entra.py:227-272` + - Fix: vergelijk `urlparse(next_url).netloc == urlparse(original_url).netloc`; gooi anders een `RuntimeError`. + +- [ ] **Header injection in `Content-Disposition`** + - `containers/clearview/src/clearview_app/main.py:701-705` + - Fix: type de route-parameter als `uuid.UUID` zodat FastAPI `job_id` valideert; `urllib.parse.quote(filename, safe="")`. + +--- + +## P1 — HIGH + +### Correctheid + +- [ ] **Token cache zonder TTL — workers crashen na 1 uur** + - `containers/clearview/src/clearview_app/scanners/sharepoint.py:34, 512-543` + - Fix: bewaar `expires_at = time.time() + result["expires_in"] - 60`; invalideer in `_get_token_for_host`. + +- [ ] **MSAL `ConfidentialClientApplication` per aanroep** + - `containers/clearview/src/clearview_app/scanners/sharepoint.py:530` + - Fix: module-level dict `(tenant_id, client_id, auth_method) -> app`. Hergebruik object. + +- [ ] **`_TOKEN_CACHE` zonder lock (race in multi-thread)** + - `containers/clearview/src/clearview_app/scanners/sharepoint.py:34` + - Fix: `threading.Lock` rond check-then-write, of `functools.lru_cache` + TTL-wrapper. + +- [ ] **Race condition in worker: niet-atomaire job-claim** + - `containers/clearview/src/clearview_app/worker.py:48-68` + - Fix: één `UPDATE scan_jobs SET status='running' WHERE id=:id AND status='queued' RETURNING id` met `FOR UPDATE SKIP LOCKED`. + +- [ ] **Auto-refresh race in frontend** + - `containers/clearview/site/app.js:1009-1013` + alle `tick()` callsites + - Fix: `AbortController` per render; vorige request cancelen voordat nieuwe gestart wordt. + +- [ ] **Event-listener accumulatie / re-render-pattern** + - `containers/clearview/site/app.js:238-270, 678-702` + - Fix: event delegation op stabiele container (`els.jobsTableBody.addEventListener('click', ...)`). + +- [ ] **Sequentieel awaiten van onafhankelijke calls** + - `containers/clearview/site/app.js:554-555, 999, 1281, 1364` + - Fix: `await Promise.all([refreshJobs(), refreshSelectedJob()])`. + +- [ ] **Niet-afgehandelde floating promise** + - `containers/clearview/site/app.js:1143` + - Fix: `.catch(err => showFeedback(...))` op `testTargetConnection(...)`. + +- [ ] **OAuth state-store is in-memory dict (breekt bij `--workers >1`)** + - `containers/clearview/src/clearview_app/onboarding.py:31, 134-145` + - Fix: state opslaan in DB (tabel `oauth_states` met `created_at`, `consumed_at`) of Redis. + +- [ ] **`scan_type` ongevalideerd** + - `containers/clearview/src/clearview_app/schemas.py:36`, `main.py:178, 207` + - Fix: `Literal["sharepoint","sharepoint_root","mailbox","entra_groups"]` in Pydantic schema. + +- [ ] **`datetime.utcnow()` deprecated, timezone-naive** (overal) + - `main.py`, `worker.py`, `models.py:26-27`, `cert.py:33` + - Fix: `datetime.now(timezone.utc)`; `DateTime(timezone=True)` in SQLAlchemy-kolommen. + +- [ ] **`ThreadPoolExecutor(max_workers=1)` per target** + - `containers/clearview/src/clearview_app/worker.py:307` + - Fix: gedeelde executor; documenteer dat `future.cancel()` lopende scan niet onderbreekt. + +- [ ] **Geen throttling-respect bij 429 in item-loop** + - `containers/clearview/src/clearview_app/scanners/sharepoint.py:603-619` + - Fix: batching via `$expand=RoleAssignments` of exponential backoff op item-niveau. + +### Hardening + +- [ ] **Container draait als root** + - `containers/clearview/Dockerfile` + - Fix: `RUN adduser --system --ingroup clearview clearview` + `USER clearview`. + +- [ ] **`.deb` van packages.microsoft.com zonder checksum** + - `containers/clearview/Dockerfile:17-19` + - Fix: hardcoded SHA256 + `sha256sum --check`, of officiële GPG-key via `signed-by`. + +- [ ] **`Install-Module ExchangeOnlineManagement` zonder versie-pin** + - `containers/clearview/Dockerfile:24-26` + - Fix: `-RequiredVersion 3.7.0` (of huidige geteste versie). + +- [ ] **Graph-foutberichten gelekt richting frontend** + - `containers/clearview/src/clearview_app/onboarding.py:170, 188` + - Fix: volledig log naar server-side DEBUG; aan client alleen generieke code + HTTP-status. + +- [ ] **OData-filter injection via displayName/mail** + - `containers/clearview/src/clearview_app/scanners/entra.py:178-196` + - Fix: `urllib.parse.quote(cleaned.replace("'", "''"), safe="")`. + +- [ ] **PowerShell-args zonder UPN-validatie** + - `containers/clearview/src/clearview_app/scanners/mailbox.py:181-190` + - `containers/clearview/src/clearview_app/scanners/exo_scripts/get-permissions.ps1:107` + - Fix: `re.fullmatch(r"[^@\s]{1,64}@[^@\s]{1,255}", upn)` vóór `_run_pwsh`. + +### Architectuur + +- [ ] **`main.py` is 1139 regels — splitsen** + - Fix: `routers/{tenants,jobs,onboarding}.py`, `services/job_service.py`, `export.py`. + +- [ ] **`sharepoint.py` is 722 regels — splitsen** + - Fix: `_auth.py`, `_http.py`, `sharepoint.py` (publieke scanfuncties), `sharing_links.py`. + +- [ ] **`_ensure_schema_columns()` met 18 raw `ALTER TABLE`** + - `containers/clearview/src/clearview_app/main.py:1115-1139` + - Fix: vervang door Alembic; commit baseline-migratie + history. + +--- + +## P2 — MEDIUM / LOW + +- [ ] **`app.js` 1586 regels, geen build step** — splits in ES-modules + esbuild/rollup; verwijder IIFE-wrapper. +- [ ] **Focus management bij route-wissel ontbreekt** — `app.js:1529-1551` — focus naar `

` van nieuwe route na navigatie. +- [ ] **Focus-outline 38% opacity voldoet niet aan WCAG 3:1** — `styles.css:292-296` — `outline: 2px solid var(--cv-accent)`. +- [ ] **Geen debouncing/abort op `jobSiteFilter`** — `app.js:1158-1172` — `AbortController` per fetch. +- [ ] **`els.submitFeedback` gebruikt voor niet-SharePoint feedback** — `app.js:682, 711` — generiek feedback-element of context-specifiek. +- [ ] **Magic string `'__manual__'`** — `app.js:412, 424` — named constant. +- [ ] **Icon-knoppen missen `aria-label`** — `app.js:229-231` — `aria-label` toevoegen. +- [ ] **`scanner.py` is shim zonder waarschuwing** — `DeprecationWarning` of verwijderen. +- [ ] **`except Exception: pass` zonder logging** (meerdere) — minimaal `log.warning(..., exc_info=True)`. +- [ ] **`_resolve_credentials(db, ...)` mist type-hint** — `main.py:935` — `db: Session`. +- [ ] **`CreateScanAppResponse` lekt secret zonder waarschuwing** — `schemas.py:149-155` — `show_once: bool` veld + log-suppression voor dit endpoint. +- [ ] **Deviations hard-capped op 1000** — `main.py:728` — voeg `total_deviations_count` + `truncated: bool` toe. +- [ ] **Geen `logging` in scanners-package** — `import logging; logger = logging.getLogger(__name__)` overal. +- [ ] **`list-mailboxes.ps1` laadt alles in geheugen** — `-ResultSize Unlimited` → paginering of cap. +- [ ] **`cert.py` private key zonder encryptie** — documenteer aanname dat caller encryptie-at-rest doet. +- [ ] **`build-and-push.sh` doet `git add -A`** — `build-and-push.sh:294` — expliciete file-lijst of bevestigingsprompt. +- [ ] **README build-instructies kloppen niet (1/2/3 vs t/r)** — `README.md:80-84`. + +--- + +## Werkwijze + +1. Werk per checkbox; verwijder geen items maar zet `[x]` als done. +2. Bij oplevering van een batch: append entry in `docs/changelog-develop.md`. +3. CRITICAL items vereisen handmatige verificatie (auth-tests, secret-rotation, XSS-payload checks). +4. Na P0 + P1: hertest met deze TODO als checklist voordat een nieuwe review wordt aangevraagd. diff --git a/scripts/bump-dev-build.py b/scripts/bump-dev-build.py new file mode 100755 index 0000000..8067617 --- /dev/null +++ b/scripts/bump-dev-build.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +"""Increment Clearview's explicit dev/test build number.""" +from __future__ import annotations + +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py" + +text = VERSION_FILE.read_text() +match = re.search(r"^BUILD = (\d+)\s*$", text, flags=re.MULTILINE) +if not match: + raise SystemExit(f"BUILD assignment not found in {VERSION_FILE}") + +next_build = int(match.group(1)) + 1 +text = text[: match.start(1)] + str(next_build) + text[match.end(1) :] +VERSION_FILE.write_text(text) + +version_match = re.search(r'^VERSION = ["\']([^"\']+)["\']\s*$', text, flags=re.MULTILINE) +version = version_match.group(1) if version_match else "v?.?.?" +print(f"[bump-dev-build] {version}.{next_build}") diff --git a/scripts/check-release-version.py b/scripts/check-release-version.py new file mode 100755 index 0000000..7c1534a --- /dev/null +++ b/scripts/check-release-version.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +"""Validate Clearview release version state before a release build.""" +from __future__ import annotations + +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py" +CHANGELOG = ROOT / "docs" / "changelog.md" + +ns: dict[str, object] = {} +exec(VERSION_FILE.read_text(), ns) +version = str(ns.get("VERSION", "")) +build = int(ns.get("BUILD", -1)) + +if build != 0: + raise SystemExit(f"Release builds require BUILD = 0 in {VERSION_FILE}; found BUILD = {build}") + +match = re.search(r"^## (v\d+\.\d+\.\d+) — \d{4}-\d{2}-\d{2}\s*$", CHANGELOG.read_text(), flags=re.MULTILINE) +if not match: + raise SystemExit(f"No release heading found in {CHANGELOG}; expected '## vX.Y.Z — YYYY-MM-DD'") + +changelog_version = match.group(1) +if changelog_version != version: + raise SystemExit( + f"Version mismatch: {VERSION_FILE} has {version}, but top changelog release is {changelog_version}" + ) + +print(f"[check-release-version] {version}") diff --git a/scripts/set-release-version.py b/scripts/set-release-version.py new file mode 100755 index 0000000..4e331f1 --- /dev/null +++ b/scripts/set-release-version.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Set Clearview's release version and reset the dev/test build segment.""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +if len(sys.argv) != 2: + raise SystemExit("usage: scripts/set-release-version.py vX.Y.Z") + +version = sys.argv[1] +if not re.fullmatch(r"v\d+\.\d+\.\d+", version): + raise SystemExit("version must match vX.Y.Z, for example v0.1.1") + +ROOT = Path(__file__).resolve().parents[1] +VERSION_FILE = ROOT / "containers" / "clearview" / "src" / "clearview_app" / "version.py" +text = VERSION_FILE.read_text() +text, n_version = re.subn(r'^VERSION = ["\'][^"\']+["\']\s*$', f'VERSION = "{version}"', text, count=1, flags=re.MULTILINE) +text, n_build = re.subn(r"^BUILD = \d+\s*$", "BUILD = 0", text, count=1, flags=re.MULTILINE) +if n_version != 1 or n_build != 1: + raise SystemExit(f"Could not update VERSION/BUILD in {VERSION_FILE}") +VERSION_FILE.write_text(text) +print(f"[set-release-version] {version}")