From f3f9d45d2b289c8cfa067ab7ba2162ba9426bbf6 Mon Sep 17 00:00:00 2001
From: Ivo Oskamp <ivooskamp@oskamp.info>
Date: Wed, 25 Mar 2026 08:53:50 +0100
Subject: [PATCH] Fix CBZ path, add download endpoint, rewrite TECHNICAL.md

- Fix CBZ extension: common.make_rel_path now accepts ext param;
  CBZ files are stored as comics/{author}/{title}.cbz instead of .cbr;
  library.py import passes actual file suffix
- Add GET /download/{filename} endpoint (was 404, referenced in book.html)
- TECHNICAL.md fully rewritten: File Storage Paths section with exact
  path patterns per format, complete endpoint lists for all routers
  including settings.py (previously undocumented), metadata save
  behaviour table, updated Known Bugs Fixed

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 containers/novela/routers/common.py  |   6 +-
 containers/novela/routers/library.py |  11 +-
 docs/TECHNICAL.md                    | 225 +++++++++++++++++----------
 docs/changelog-develop.md            |   7 +-
 4 files changed, 164 insertions(+), 85 deletions(-)

diff --git a/containers/novela/routers/common.py b/containers/novela/routers/common.py
index ec89759..c86ffea 100644
--- a/containers/novela/routers/common.py
+++ b/containers/novela/routers/common.py
@@ -59,7 +59,7 @@ def coerce_series_index(value: int | str | None) -> int:
         return 1
 
 
-def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, series: str, series_index: int | str | None) -> Path:
+def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, series: str, series_index: int | str | None, ext: str = "") -> Path:
     if media_type == "epub":
         pub = clean_segment(publisher, "Unknown Publisher", 80)
         auth = clean_segment(author, "Unknown Author", 80)
@@ -74,9 +74,11 @@ def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, s
         ttl = clean_segment(title, "Untitled", 140)
         return Path("pdf") / auth / f"{ttl}.pdf"
 
+    # CBR / CBZ — preserve the original extension; default to .cbr
+    comics_ext = ext if ext in {".cbr", ".cbz"} else ".cbr"
     auth = clean_segment(author, "Unknown", 80)
     ttl = clean_segment(title, "Untitled", 140)
-    return Path("comics") / auth / f"{ttl}.cbr"
+    return Path("comics") / auth / f"{ttl}{comics_ext}"
 
 
 def ensure_unique_rel_path(rel_path: Path) -> Path:
diff --git a/containers/novela/routers/library.py b/containers/novela/routers/library.py
index 72e2a6a..68291d2 100644
--- a/containers/novela/routers/library.py
+++ b/containers/novela/routers/library.py
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 
 from fastapi import APIRouter, File, Request, UploadFile
-from fastapi.responses import HTMLResponse, Response
+from fastapi.responses import FileResponse, HTMLResponse, Response
 from fastapi.templating import Jinja2Templates
 from PIL import UnidentifiedImageError
 
@@ -128,6 +128,7 @@ async def library_import(files: list[UploadFile] = File(...)):
                             title=meta.get("title") or Path(name).stem,
                             series=meta.get("series", ""),
                             series_index=meta.get("series_index", 0),
+                            ext=suffix,
                         )
                     )
                     dest = LIBRARY_DIR / rel
@@ -148,6 +149,14 @@ async def library_import(files: list[UploadFile] = File(...)):
     return {"ok": True, "imported": imported, "skipped": skipped}
 
 
+@router.get("/download/{filename:path}")
+async def library_download(filename: str):
+    full = resolve_library_path(filename)
+    if full is None or not full.exists():
+        return Response(status_code=404)
+    return FileResponse(full, filename=full.name, media_type="application/octet-stream")
+
+
 @router.delete("/library/file/{filename:path}")
 async def library_delete(filename: str):
     full = resolve_library_path(filename)
diff --git a/docs/TECHNICAL.md b/docs/TECHNICAL.md
index 9d2e667..822bcc2 100644
--- a/docs/TECHNICAL.md
+++ b/docs/TECHNICAL.md
@@ -16,26 +16,64 @@ It is the primary technical reference for the current implementation.
   2. `close_pool()`
 - Source-of-truth rule: files on disk are authoritative, the database is an index/cache.
 
+## File Storage Paths
+
+All files are stored under `library/` (relative to the app working directory, mapped via Docker volume).
+`LIBRARY_DIR = Path("library")`, `LIBRARY_ROOT = LIBRARY_DIR.resolve()`.
+
+### Path structure per format
+
+| Format | Path pattern |
+|--------|-------------|
+| EPUB (no series) | `library/epub/{publisher}/{author}/Stories/{title}.epub` |
+| EPUB (series) | `library/epub/{publisher}/{author}/Series/{series}/{idx:03d} - {title}.epub` |
+| PDF | `library/pdf/{author}/{title}.pdf` |
+| CBR | `library/comics/{author}/{title}.cbr` |
+| CBZ | `library/comics/{author}/{title}.cbz` |
+
+- Segments are sanitised: special chars stripped, max lengths applied (publisher/author 80, title 140, series 80).
+- Series index is zero-padded to 3 digits (`001`, `002`, …), clamped to 1–999.
+- Duplicate filenames get a `(2)`, `(3)`, … suffix.
+- After any file move, empty parent directories are pruned up to `LIBRARY_ROOT`.
+
+### Path logic
+
+- `common.make_rel_path(media_type, publisher, author, title, series, series_index, ext)` — used by import and grabber.
+- `reader.py _make_rel_path(publisher, author, title, series, series_index, ext)` — used by metadata PATCH; same logic, uses actual file extension.
+- Both functions produce identical paths for all formats.
+
+### Metadata save behaviour per format
+
+| Format | File written? | DB written? |
+|--------|--------------|-------------|
+| EPUB | Yes — OPF metadata updated in-place | Yes |
+| PDF | No | Yes |
+| CBR | No | Yes |
+| CBZ | No (tags/metadata); rating written to ComicInfo.xml | Yes |
+
+---
+
 ## Router Status
 
 ### `routers/library.py`
-- `GET /library`
-- `GET /api/library`
-- `POST /library/rescan`
-- `POST /library/import` (EPUB/PDF/CBR/CBZ)
-- `DELETE /library/file/{filename}`
-- `GET /library/cover/{filename}`
-- `GET /library/cover-cached/{filename}`
-- `POST /library/cover/{filename}` (EPUB)
-- `POST /library/want-to-read/{filename}`
-- `POST /library/archive/{filename}`
-- `POST /library/new/mark-reviewed` (bulk `needs_review=false`)
-- `POST /library/rating/{filename}` (set/clear star rating, body: `{"rating": 0-5}`)
-- `GET /home`
-- `GET /api/home`
-- `GET /stats`
-- `GET /api/stats`
-- `GET /library/list` (compat)
+- `GET /library` — library page
+- `GET /api/library` — book list JSON (fast-path by default)
+- `POST /library/rescan` — forced full disk rescan
+- `POST /library/import` — upload EPUB/PDF/CBR/CBZ
+- `DELETE /library/file/{filename}` — delete file + DB row + prune dirs
+- `GET /download/{filename}` — download file with `Content-Disposition: attachment`
+- `GET /library/cover/{filename}` — serve cover (EPUB from file; PDF/CBR from cache)
+- `GET /library/cover-cached/{filename}` — serve cover from DB cache only
+- `POST /library/cover/{filename}` — upload/replace cover (EPUB only)
+- `POST /library/want-to-read/{filename}` — toggle want-to-read flag
+- `POST /library/archive/{filename}` — toggle archived flag
+- `POST /library/new/mark-reviewed` — bulk set `needs_review=false`
+- `POST /library/rating/{filename}` — set/clear star rating `{"rating": 0-5}`
+- `GET /home` — home page
+- `GET /api/home` — home data JSON
+- `GET /stats` — statistics page
+- `GET /api/stats` — statistics data JSON
+- `GET /library/list` — compat alias
 
 `GET /api/library` runs in fast-path mode by default (DB-only, no full disk rescan).
 For a forced sync: `GET /api/library?rescan=true` or `POST /library/rescan`.
@@ -62,36 +100,60 @@ Home read sections are ordered oldest-first:
 - `novels_read`: `ORDER BY MAX(read_at) ASC`
 
 ### `routers/reader.py`
-- EPUB serving/chapters/images
-- Reader page + book detail
-- Metadata patch (`PATCH /library/book/{filename}`): updates DB for all formats; writes to file only for EPUB
-- Progress read/write/delete
-- Mark-as-read
-- Star rating (`POST /library/rating/{filename}`): validates 0–5, writes to file (EPUB OPF / CBZ ComicInfo.xml) and DB; DB-only for CBR/PDF
-- PDF render endpoint (`GET /library/pdf/{filename}?page=N&dpi=150`) — returns page as PNG
-- PDF info endpoint (`GET /api/pdf/info/{filename}`) — returns `{"page_count": N}`
-- CBR/CBZ page endpoint
-- Genres endpoint
+- `GET /library/epub/{filename}` — serve EPUB inline (no attachment header)
+- `GET /library/chapters/{filename}` — EPUB spine as JSON
+- `GET /library/chapter/{index}/{filename}` — single EPUB chapter as HTML fragment
+- `GET /library/chapter-img/{path}?filename=…` — image extracted from EPUB ZIP
+- `GET /library/pdf/{filename}?page=N&dpi=150` — render PDF page as PNG
+- `GET /api/pdf/info/{filename}` — `{"page_count": N}`
+- `GET /library/cbr/{filename}/{page}` — CBR/CBZ page as image
+- `GET /library/progress/{filename}` — read progress
+- `POST /library/progress/{filename}` — save progress `{"cfi": "…", "progress": N}`
+- `DELETE /library/progress/{filename}` — clear progress
+- `POST /library/mark-read/{filename}` — mark as read (with optional date)
+- `GET /library/book/{filename}` — book detail page
+- `GET /api/genres` — all tags from `book_tags` (optional `?type=genre|subgenre|tag`)
+- `PATCH /library/book/{filename}` — update metadata + tags; moves file if path fields change; DB-only for non-EPUB
+- `POST /library/rating/{filename}` — set/clear 1–5 star rating; writes to EPUB OPF / CBZ ComicInfo.xml; DB-only for CBR/PDF
+- `GET /library/read/{filename}` — reader page (EPUB or PDF)
 
 ### `routers/editor.py`
-- Editor page
-- Chapter get/save
-- Chapter add
-- Chapter delete
+- `GET /library/editor/{filename}` — EPUB chapter editor page
+- `GET /api/edit/chapter/{index}/{filename}` — get chapter HTML
+- `POST /api/edit/chapter/{index}/{filename}` — save chapter HTML
+- `POST /api/edit/chapter/add/{filename}` — add new chapter
+- `DELETE /api/edit/chapter/{index}/{filename}` — delete chapter
 
 ### `routers/grabber.py`
-- Grabber page + convert/debug flows
-- SSE events
-- Credential management for scraper sites
-- Credentials manager UI (`/credentials-manager`)
+- `GET /grabber` — grabber page
+- `GET /convert` — convert page
+- `GET /credentials-manager` — credentials manager UI
+- `GET /debug` — debug page
+- `POST /debug/run` — run debug scrape
+- `GET /credentials` — list stored credentials
+- `POST /credentials` — save credential
+- `DELETE /credentials/{site}` — delete credential
+- `POST /preload` — preload book info from URL
+- `POST /convert` — run scrape + convert to EPUB
+- `GET /events/{job_id}` — SSE stream for job progress
+
+### `routers/settings.py`
+- `GET /settings` — settings page
+- `GET /api/break-patterns` — list chapter-break patterns
+- `POST /api/break-patterns` — add break pattern (type: `regex` or `css_class`)
+- `PATCH /api/break-patterns/{id}` — update pattern (enable/disable or change value)
+- `DELETE /api/break-patterns/{id}` — delete pattern
+- `DELETE /api/reading-history` — wipe all reading sessions
 
 ### `routers/backup.py`
-- `GET /backup`
-- `GET/POST/DELETE /api/backup/credentials`
-- `GET /api/backup/health`
-- `GET /api/backup/status`
-- `GET /api/backup/history`
-- `POST /api/backup/run`
+- `GET /backup` — backup page
+- `GET /POST /DELETE /api/backup/credentials` — Dropbox settings
+- `GET /api/backup/health` — Dropbox connectivity check
+- `GET /api/backup/status` — current backup status
+- `GET /api/backup/history` — backup run history
+- `POST /api/backup/run` — trigger backup (background task)
+
+---
 
 ## Backup & Security
 - Dropbox token is stored encrypted-at-rest in `credentials` (`site='dropbox'`).
@@ -114,6 +176,8 @@ Implementation details:
 - Concurrency guard: only one backup can run at a time.
 - After container restart/crash, stale `running` logs are auto-marked as interrupted/error.
 
+---
+
 ## Environment
 `stack/novela.env` should include at least:
 - `POSTGRES_DB`
@@ -124,66 +188,53 @@ Implementation details:
 
 Dropbox settings are managed via the web UI on `/backup`.
 
+---
+
 ## UI Notes
 - Library import accepts EPUB/PDF/CBR/CBZ.
 - Home supports the same import formats.
 - Home includes search.
 - Home header/dropzone alignment matches Library (search top-right, dropzone below).
 - `New` view supports `Grid` and `List` mode.
-- Bulk selection + `Remove from New` works only in `List` mode.
-- `List` mode has a column visibility filter with columns:
-  - Publisher
-  - Author
-  - Series
-  - Volume
-  - Title
-  - Has cover
-  - Updated
-  - Genres
-  - Sub-genres
-  - Tags
-  - Status
-- `List` mode supports multi-select with `Shift+click` range selection on checkboxes.
-- `Grid` mode shows no selection checkboxes or bulk actions.
+  - Bulk selection + `Remove from New` works only in `List` mode.
+  - `List` mode has a column visibility filter: Publisher, Author, Series, Volume, Title, Has cover, Updated, Genres, Sub-genres, Tags, Status.
+  - `List` mode supports multi-select with `Shift+click` range selection on checkboxes.
+  - `Grid` mode shows no selection checkboxes or bulk actions.
 - `All books` view supports `Grid` and `List` mode (same columns as `New`, no selection/bulk actions).
   - View mode persisted in `localStorage` as `novela.all.viewMode`.
   - Column visibility persisted in `localStorage` as `novela.all.visibleColumns`.
-- Star ratings (1–5) are shown under the cover in all grid views (Library, Home):
-  - Display-only in grid cards (no click handler, prevents accidental taps).
+- Star ratings (1–5) shown under the cover in all grid views:
+  - Display-only in grid cards (no click, prevents accidental taps while scrolling).
   - Interactive in Book Detail (1.1rem, clickable; clicking the active star clears the rating).
-  - Amber color: filled `#c8a03a`, unfilled `rgba(200, 160, 58, 0.25)`.
-- Reader has a text colour setting in the hamburger menu:
-  - 5 presets from `#e8e2d9` (bright) to `#938d86` (dim), persisted in `localStorage` as `reader-text-colour`.
-  - Hamburger and back-link are visually separated with `margin-left: 1rem` on `.header-back`.
-- Backup page supports:
-  - manual run and dry-run
-  - Dropbox root settings
-  - snapshot retention count
-  - scheduled backup (on/off + interval in hours)
-  - status + history overview
+  - Amber: filled `#c8a03a`, unfilled `rgba(200, 160, 58, 0.25)`.
+- Reader settings (hamburger menu):
+  - Content width slider (30–100 vw), persisted as `reader-content-width-pct`.
+  - Text colour: 5 warm-tone presets `#e8e2d9` → `#938d86`, persisted as `reader-text-colour`.
+  - Hamburger and back-link separated with `margin-left: 1rem` on `.header-back`.
 - Reader supports EPUB and PDF:
-  - EPUB: chapter-text rendering (existing flow)
-  - PDF: page-image rendering via `/library/pdf/{filename}?page=N`; page count fetched from `/api/pdf/info/{filename}`; progress tracked per page; keyboard/button navigation identical to EPUB
-  - `reader.html` branches on `FORMAT` variable injected by the server
+  - EPUB: chapter-text rendering; progress = `{chapterIndex}:{scrollFrac}`.
+  - PDF: page-image rendering via `/library/pdf/{filename}?page=N`; page count from `/api/pdf/info/{filename}`; progress = `{pageIndex}:0`; keyboard/button navigation identical.
+  - `reader.html` branches on `FORMAT` variable injected by the server.
 - `Edit EPUB` button in Book Detail is only shown for `.epub` files.
+- Backup page supports: manual run, dry-run, Dropbox root, retention count, schedule (on/off + hours), status + history.
 
-## Known Bugs Fixed
-- `renderGenreView` and `renderSearchResults` in `library.js` referenced `b.genres` (non-existent field on the book object). All tag data lives in `b.tags` as `{tag, tag_type}` objects; the correct helpers are `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`.
-- `PillInput` in `book.js` did not handle comma as a delimiter and did not flush pending input on save. Fixed with comma keydown handler and `flush()` called in `saveEdit()`.
-- `PATCH /library/book/{filename}` failed for PDFs: `_sync_epub_metadata` tried to open the PDF as a ZIP, throwing an exception that aborted the entire save (including the DB update). Fixed by only calling `_sync_epub_metadata` when `ext == ".epub"`.
-- `_make_rel_path` in `reader.py` lacked the format prefix (`epub/`, `pdf/`, `comics/`) used by `common.make_rel_path`, causing files to be moved outside their format directory on metadata save. Fixed by aligning the path logic: EPUB → `epub/{publisher}/{author}/…`, PDF → `pdf/{author}/{title}.pdf`, CBR/CBZ → `comics/{author}/{title}{ext}`.
-- PDF reader showed infinite loading: `reader.html` always called `/library/chapters/{filename}` (EPUB-only) and tried to render chapter text. PDF reader now fetches page count and renders page images.
+---
 
 ## Known Conventions
-- Book deletion flow: delete file, prune empty directories, then `DELETE FROM library` (cascade removes child rows).
+- Book deletion flow: `unlink` file → `prune_empty_dirs(parent)` → `DELETE FROM library` (cascade removes child rows).
+- Empty dir pruning: `prune_empty_dirs(start)` walks up from `start` to `LIBRARY_ROOT`, removing each dir if empty; stops at first non-empty dir.
 - Cover strategy:
-  - EPUB: cover from file + cache
-  - PDF/CBR: thumbnail via cover cache
+  - EPUB: extracted from ZIP + cached in `library_cover_cache`
+  - PDF: first page rendered as thumbnail, cached
+  - CBR/CBZ: first page extracted, cached
 - Rating storage:
   - EPUB: `<meta name="novela:rating" content="N"/>` in OPF
   - CBZ: `<NovelaRating>N</NovelaRating>` in `ComicInfo.xml` inside the ZIP
   - CBR/PDF: DB only
-  - `upsert_book` uses `CASE WHEN EXCLUDED.rating > 0 THEN EXCLUDED.rating ELSE library.rating END` to restore rating from file without overwriting existing DB value
+  - `upsert_book` uses `CASE WHEN EXCLUDED.rating > 0 THEN EXCLUDED.rating ELSE library.rating END` to restore rating from file without overwriting existing DB value.
+- Tag types in `book_tags`: `genre`, `subgenre`, `tag`, `subject`. No direct `genres`/`subgenres` fields on book objects; always use helpers `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`.
+
+---
 
 ## Performance Notes
 - Library load is optimized for large datasets:
@@ -195,3 +246,15 @@ Dropbox settings are managed via the web UI on `/backup`.
   - `idx_library_archived`
   - `idx_reading_sessions_filename_readat`
   - `idx_book_tags_filename_tag`
+
+---
+
+## Known Bugs Fixed
+- `renderGenreView` and `renderSearchResults` in `library.js` referenced `b.genres` (non-existent). Fixed: use `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`.
+- `PillInput` in `book.js` did not handle comma as delimiter and did not flush on save. Fixed: comma keydown + `flush()` in `saveEdit()`.
+- `PATCH /library/book` failed for PDFs: `_sync_epub_metadata` tried to open PDF as ZIP. Fixed: only called for `.epub`.
+- `_make_rel_path` in `reader.py` lacked format prefix (`epub/`, `pdf/`, `comics/`). Fixed: aligned with `common.make_rel_path`.
+- `common.make_rel_path` always generated `.cbr` extension for CBZ files (both map to `media_type="cbr"`). Fixed: accepts optional `ext` parameter; `library.py` import now passes actual suffix.
+- `/download/{filename}` was referenced in `book.html` but no endpoint existed (404). Fixed: added `GET /download/{filename}` to `library.py`.
+- PDF reader showed infinite loading: `reader.html` called EPUB-only `/library/chapters/`. Fixed: PDF path uses `/api/pdf/info/` + page-image rendering.
+- Empty dir pruning only ran when file was moved. Fixed: `prune_empty_dirs(old_path.parent)` always runs after a successful metadata save.
diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md
index f805c64..e4cc150 100644
--- a/docs/changelog-develop.md
+++ b/docs/changelog-develop.md
@@ -46,7 +46,12 @@ This file tracks changes on the `develop` line.
 - Updated Docker image with `postgresql-client` for `pg_dump`.
 - Multiple test builds pushed to `gitea.oskamp.info/ivooskamp/novela:dev`.
 
-## 2026-03-25
+## 2026-03-25 (3)
+- Fixed CBZ extension in import: `common.make_rel_path` always generated `.cbr` for CBZ files; now accepts `ext` parameter; `library.py` passes actual suffix so CBZ files land at `comics/{author}/{title}.cbz`
+- Added missing `GET /download/{filename}` endpoint (referenced in book.html but was 404)
+- TECHNICAL.md fully rewritten: added File Storage Paths section, complete endpoint lists for all routers including settings.py, corrected path documentation
+
+## 2026-03-25 (2)
 - Fixed PDF metadata editing (PATCH /library/book):
   - `_sync_epub_metadata` is now only called for `.epub` files; PDFs update DB only
   - `_make_rel_path` now includes the format prefix matching import: EPUB → `epub/{publisher}/{author}/…`, PDF → `pdf/{author}/{title}.pdf`, CBR/CBZ → `comics/{author}/{title}{ext}`; previously files were moved outside their format directory on metadata save