From f3f9d45d2b289c8cfa067ab7ba2162ba9426bbf6 Mon Sep 17 00:00:00 2001 From: Ivo Oskamp Date: Wed, 25 Mar 2026 08:53:50 +0100 Subject: [PATCH] Fix CBZ path, add download endpoint, rewrite TECHNICAL.md - Fix CBZ extension: common.make_rel_path now accepts ext param; CBZ files are stored as comics/{author}/{title}.cbz instead of .cbr; library.py import passes actual file suffix - Add GET /download/{filename} endpoint (was 404, referenced in book.html) - TECHNICAL.md fully rewritten: File Storage Paths section with exact path patterns per format, complete endpoint lists for all routers including settings.py (previously undocumented), metadata save behaviour table, updated Known Bugs Fixed Co-Authored-By: Claude Sonnet 4.6 --- containers/novela/routers/common.py | 6 +- containers/novela/routers/library.py | 11 +- docs/TECHNICAL.md | 225 +++++++++++++++++---------- docs/changelog-develop.md | 7 +- 4 files changed, 164 insertions(+), 85 deletions(-) diff --git a/containers/novela/routers/common.py b/containers/novela/routers/common.py index ec89759..c86ffea 100644 --- a/containers/novela/routers/common.py +++ b/containers/novela/routers/common.py @@ -59,7 +59,7 @@ def coerce_series_index(value: int | str | None) -> int: return 1 -def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, series: str, series_index: int | str | None) -> Path: +def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, series: str, series_index: int | str | None, ext: str = "") -> Path: if media_type == "epub": pub = clean_segment(publisher, "Unknown Publisher", 80) auth = clean_segment(author, "Unknown Author", 80) @@ -74,9 +74,11 @@ def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, s ttl = clean_segment(title, "Untitled", 140) return Path("pdf") / auth / f"{ttl}.pdf" + # CBR / CBZ — preserve the original extension; default to .cbr + comics_ext = ext if ext in {".cbr", ".cbz"} else ".cbr" auth = clean_segment(author, "Unknown", 80) ttl = clean_segment(title, "Untitled", 140) - return Path("comics") / auth / f"{ttl}.cbr" + return Path("comics") / auth / f"{ttl}{comics_ext}" def ensure_unique_rel_path(rel_path: Path) -> Path: diff --git a/containers/novela/routers/library.py b/containers/novela/routers/library.py index 72e2a6a..68291d2 100644 --- a/containers/novela/routers/library.py +++ b/containers/novela/routers/library.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone from pathlib import Path from fastapi import APIRouter, File, Request, UploadFile -from fastapi.responses import HTMLResponse, Response +from fastapi.responses import FileResponse, HTMLResponse, Response from fastapi.templating import Jinja2Templates from PIL import UnidentifiedImageError @@ -128,6 +128,7 @@ async def library_import(files: list[UploadFile] = File(...)): title=meta.get("title") or Path(name).stem, series=meta.get("series", ""), series_index=meta.get("series_index", 0), + ext=suffix, ) ) dest = LIBRARY_DIR / rel @@ -148,6 +149,14 @@ async def library_import(files: list[UploadFile] = File(...)): return {"ok": True, "imported": imported, "skipped": skipped} +@router.get("/download/{filename:path}") +async def library_download(filename: str): + full = resolve_library_path(filename) + if full is None or not full.exists(): + return Response(status_code=404) + return FileResponse(full, filename=full.name, media_type="application/octet-stream") + + @router.delete("/library/file/{filename:path}") async def library_delete(filename: str): full = resolve_library_path(filename) diff --git a/docs/TECHNICAL.md b/docs/TECHNICAL.md index 9d2e667..822bcc2 100644 --- a/docs/TECHNICAL.md +++ b/docs/TECHNICAL.md @@ -16,26 +16,64 @@ It is the primary technical reference for the current implementation. 2. `close_pool()` - Source-of-truth rule: files on disk are authoritative, the database is an index/cache. +## File Storage Paths + +All files are stored under `library/` (relative to the app working directory, mapped via Docker volume). +`LIBRARY_DIR = Path("library")`, `LIBRARY_ROOT = LIBRARY_DIR.resolve()`. + +### Path structure per format + +| Format | Path pattern | +|--------|-------------| +| EPUB (no series) | `library/epub/{publisher}/{author}/Stories/{title}.epub` | +| EPUB (series) | `library/epub/{publisher}/{author}/Series/{series}/{idx:03d} - {title}.epub` | +| PDF | `library/pdf/{author}/{title}.pdf` | +| CBR | `library/comics/{author}/{title}.cbr` | +| CBZ | `library/comics/{author}/{title}.cbz` | + +- Segments are sanitised: special chars stripped, max lengths applied (publisher/author 80, title 140, series 80). +- Series index is zero-padded to 3 digits (`001`, `002`, …), clamped to 1–999. +- Duplicate filenames get a `(2)`, `(3)`, … suffix. +- After any file move, empty parent directories are pruned up to `LIBRARY_ROOT`. + +### Path logic + +- `common.make_rel_path(media_type, publisher, author, title, series, series_index, ext)` — used by import and grabber. +- `reader.py _make_rel_path(publisher, author, title, series, series_index, ext)` — used by metadata PATCH; same logic, uses actual file extension. +- Both functions produce identical paths for all formats. + +### Metadata save behaviour per format + +| Format | File written? | DB written? | +|--------|--------------|-------------| +| EPUB | Yes — OPF metadata updated in-place | Yes | +| PDF | No | Yes | +| CBR | No | Yes | +| CBZ | No (tags/metadata); rating written to ComicInfo.xml | Yes | + +--- + ## Router Status ### `routers/library.py` -- `GET /library` -- `GET /api/library` -- `POST /library/rescan` -- `POST /library/import` (EPUB/PDF/CBR/CBZ) -- `DELETE /library/file/{filename}` -- `GET /library/cover/{filename}` -- `GET /library/cover-cached/{filename}` -- `POST /library/cover/{filename}` (EPUB) -- `POST /library/want-to-read/{filename}` -- `POST /library/archive/{filename}` -- `POST /library/new/mark-reviewed` (bulk `needs_review=false`) -- `POST /library/rating/{filename}` (set/clear star rating, body: `{"rating": 0-5}`) -- `GET /home` -- `GET /api/home` -- `GET /stats` -- `GET /api/stats` -- `GET /library/list` (compat) +- `GET /library` — library page +- `GET /api/library` — book list JSON (fast-path by default) +- `POST /library/rescan` — forced full disk rescan +- `POST /library/import` — upload EPUB/PDF/CBR/CBZ +- `DELETE /library/file/{filename}` — delete file + DB row + prune dirs +- `GET /download/{filename}` — download file with `Content-Disposition: attachment` +- `GET /library/cover/{filename}` — serve cover (EPUB from file; PDF/CBR from cache) +- `GET /library/cover-cached/{filename}` — serve cover from DB cache only +- `POST /library/cover/{filename}` — upload/replace cover (EPUB only) +- `POST /library/want-to-read/{filename}` — toggle want-to-read flag +- `POST /library/archive/{filename}` — toggle archived flag +- `POST /library/new/mark-reviewed` — bulk set `needs_review=false` +- `POST /library/rating/{filename}` — set/clear star rating `{"rating": 0-5}` +- `GET /home` — home page +- `GET /api/home` — home data JSON +- `GET /stats` — statistics page +- `GET /api/stats` — statistics data JSON +- `GET /library/list` — compat alias `GET /api/library` runs in fast-path mode by default (DB-only, no full disk rescan). For a forced sync: `GET /api/library?rescan=true` or `POST /library/rescan`. @@ -62,36 +100,60 @@ Home read sections are ordered oldest-first: - `novels_read`: `ORDER BY MAX(read_at) ASC` ### `routers/reader.py` -- EPUB serving/chapters/images -- Reader page + book detail -- Metadata patch (`PATCH /library/book/{filename}`): updates DB for all formats; writes to file only for EPUB -- Progress read/write/delete -- Mark-as-read -- Star rating (`POST /library/rating/{filename}`): validates 0–5, writes to file (EPUB OPF / CBZ ComicInfo.xml) and DB; DB-only for CBR/PDF -- PDF render endpoint (`GET /library/pdf/{filename}?page=N&dpi=150`) — returns page as PNG -- PDF info endpoint (`GET /api/pdf/info/{filename}`) — returns `{"page_count": N}` -- CBR/CBZ page endpoint -- Genres endpoint +- `GET /library/epub/{filename}` — serve EPUB inline (no attachment header) +- `GET /library/chapters/{filename}` — EPUB spine as JSON +- `GET /library/chapter/{index}/{filename}` — single EPUB chapter as HTML fragment +- `GET /library/chapter-img/{path}?filename=…` — image extracted from EPUB ZIP +- `GET /library/pdf/{filename}?page=N&dpi=150` — render PDF page as PNG +- `GET /api/pdf/info/{filename}` — `{"page_count": N}` +- `GET /library/cbr/{filename}/{page}` — CBR/CBZ page as image +- `GET /library/progress/{filename}` — read progress +- `POST /library/progress/{filename}` — save progress `{"cfi": "…", "progress": N}` +- `DELETE /library/progress/{filename}` — clear progress +- `POST /library/mark-read/{filename}` — mark as read (with optional date) +- `GET /library/book/{filename}` — book detail page +- `GET /api/genres` — all tags from `book_tags` (optional `?type=genre|subgenre|tag`) +- `PATCH /library/book/{filename}` — update metadata + tags; moves file if path fields change; DB-only for non-EPUB +- `POST /library/rating/{filename}` — set/clear 1–5 star rating; writes to EPUB OPF / CBZ ComicInfo.xml; DB-only for CBR/PDF +- `GET /library/read/{filename}` — reader page (EPUB or PDF) ### `routers/editor.py` -- Editor page -- Chapter get/save -- Chapter add -- Chapter delete +- `GET /library/editor/{filename}` — EPUB chapter editor page +- `GET /api/edit/chapter/{index}/{filename}` — get chapter HTML +- `POST /api/edit/chapter/{index}/{filename}` — save chapter HTML +- `POST /api/edit/chapter/add/{filename}` — add new chapter +- `DELETE /api/edit/chapter/{index}/{filename}` — delete chapter ### `routers/grabber.py` -- Grabber page + convert/debug flows -- SSE events -- Credential management for scraper sites -- Credentials manager UI (`/credentials-manager`) +- `GET /grabber` — grabber page +- `GET /convert` — convert page +- `GET /credentials-manager` — credentials manager UI +- `GET /debug` — debug page +- `POST /debug/run` — run debug scrape +- `GET /credentials` — list stored credentials +- `POST /credentials` — save credential +- `DELETE /credentials/{site}` — delete credential +- `POST /preload` — preload book info from URL +- `POST /convert` — run scrape + convert to EPUB +- `GET /events/{job_id}` — SSE stream for job progress + +### `routers/settings.py` +- `GET /settings` — settings page +- `GET /api/break-patterns` — list chapter-break patterns +- `POST /api/break-patterns` — add break pattern (type: `regex` or `css_class`) +- `PATCH /api/break-patterns/{id}` — update pattern (enable/disable or change value) +- `DELETE /api/break-patterns/{id}` — delete pattern +- `DELETE /api/reading-history` — wipe all reading sessions ### `routers/backup.py` -- `GET /backup` -- `GET/POST/DELETE /api/backup/credentials` -- `GET /api/backup/health` -- `GET /api/backup/status` -- `GET /api/backup/history` -- `POST /api/backup/run` +- `GET /backup` — backup page +- `GET /POST /DELETE /api/backup/credentials` — Dropbox settings +- `GET /api/backup/health` — Dropbox connectivity check +- `GET /api/backup/status` — current backup status +- `GET /api/backup/history` — backup run history +- `POST /api/backup/run` — trigger backup (background task) + +--- ## Backup & Security - Dropbox token is stored encrypted-at-rest in `credentials` (`site='dropbox'`). @@ -114,6 +176,8 @@ Implementation details: - Concurrency guard: only one backup can run at a time. - After container restart/crash, stale `running` logs are auto-marked as interrupted/error. +--- + ## Environment `stack/novela.env` should include at least: - `POSTGRES_DB` @@ -124,66 +188,53 @@ Implementation details: Dropbox settings are managed via the web UI on `/backup`. +--- + ## UI Notes - Library import accepts EPUB/PDF/CBR/CBZ. - Home supports the same import formats. - Home includes search. - Home header/dropzone alignment matches Library (search top-right, dropzone below). - `New` view supports `Grid` and `List` mode. -- Bulk selection + `Remove from New` works only in `List` mode. -- `List` mode has a column visibility filter with columns: - - Publisher - - Author - - Series - - Volume - - Title - - Has cover - - Updated - - Genres - - Sub-genres - - Tags - - Status -- `List` mode supports multi-select with `Shift+click` range selection on checkboxes. -- `Grid` mode shows no selection checkboxes or bulk actions. + - Bulk selection + `Remove from New` works only in `List` mode. + - `List` mode has a column visibility filter: Publisher, Author, Series, Volume, Title, Has cover, Updated, Genres, Sub-genres, Tags, Status. + - `List` mode supports multi-select with `Shift+click` range selection on checkboxes. + - `Grid` mode shows no selection checkboxes or bulk actions. - `All books` view supports `Grid` and `List` mode (same columns as `New`, no selection/bulk actions). - View mode persisted in `localStorage` as `novela.all.viewMode`. - Column visibility persisted in `localStorage` as `novela.all.visibleColumns`. -- Star ratings (1–5) are shown under the cover in all grid views (Library, Home): - - Display-only in grid cards (no click handler, prevents accidental taps). +- Star ratings (1–5) shown under the cover in all grid views: + - Display-only in grid cards (no click, prevents accidental taps while scrolling). - Interactive in Book Detail (1.1rem, clickable; clicking the active star clears the rating). - - Amber color: filled `#c8a03a`, unfilled `rgba(200, 160, 58, 0.25)`. -- Reader has a text colour setting in the hamburger menu: - - 5 presets from `#e8e2d9` (bright) to `#938d86` (dim), persisted in `localStorage` as `reader-text-colour`. - - Hamburger and back-link are visually separated with `margin-left: 1rem` on `.header-back`. -- Backup page supports: - - manual run and dry-run - - Dropbox root settings - - snapshot retention count - - scheduled backup (on/off + interval in hours) - - status + history overview + - Amber: filled `#c8a03a`, unfilled `rgba(200, 160, 58, 0.25)`. +- Reader settings (hamburger menu): + - Content width slider (30–100 vw), persisted as `reader-content-width-pct`. + - Text colour: 5 warm-tone presets `#e8e2d9` → `#938d86`, persisted as `reader-text-colour`. + - Hamburger and back-link separated with `margin-left: 1rem` on `.header-back`. - Reader supports EPUB and PDF: - - EPUB: chapter-text rendering (existing flow) - - PDF: page-image rendering via `/library/pdf/{filename}?page=N`; page count fetched from `/api/pdf/info/{filename}`; progress tracked per page; keyboard/button navigation identical to EPUB - - `reader.html` branches on `FORMAT` variable injected by the server + - EPUB: chapter-text rendering; progress = `{chapterIndex}:{scrollFrac}`. + - PDF: page-image rendering via `/library/pdf/{filename}?page=N`; page count from `/api/pdf/info/{filename}`; progress = `{pageIndex}:0`; keyboard/button navigation identical. + - `reader.html` branches on `FORMAT` variable injected by the server. - `Edit EPUB` button in Book Detail is only shown for `.epub` files. +- Backup page supports: manual run, dry-run, Dropbox root, retention count, schedule (on/off + hours), status + history. -## Known Bugs Fixed -- `renderGenreView` and `renderSearchResults` in `library.js` referenced `b.genres` (non-existent field on the book object). All tag data lives in `b.tags` as `{tag, tag_type}` objects; the correct helpers are `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`. -- `PillInput` in `book.js` did not handle comma as a delimiter and did not flush pending input on save. Fixed with comma keydown handler and `flush()` called in `saveEdit()`. -- `PATCH /library/book/{filename}` failed for PDFs: `_sync_epub_metadata` tried to open the PDF as a ZIP, throwing an exception that aborted the entire save (including the DB update). Fixed by only calling `_sync_epub_metadata` when `ext == ".epub"`. -- `_make_rel_path` in `reader.py` lacked the format prefix (`epub/`, `pdf/`, `comics/`) used by `common.make_rel_path`, causing files to be moved outside their format directory on metadata save. Fixed by aligning the path logic: EPUB → `epub/{publisher}/{author}/…`, PDF → `pdf/{author}/{title}.pdf`, CBR/CBZ → `comics/{author}/{title}{ext}`. -- PDF reader showed infinite loading: `reader.html` always called `/library/chapters/{filename}` (EPUB-only) and tried to render chapter text. PDF reader now fetches page count and renders page images. +--- ## Known Conventions -- Book deletion flow: delete file, prune empty directories, then `DELETE FROM library` (cascade removes child rows). +- Book deletion flow: `unlink` file → `prune_empty_dirs(parent)` → `DELETE FROM library` (cascade removes child rows). +- Empty dir pruning: `prune_empty_dirs(start)` walks up from `start` to `LIBRARY_ROOT`, removing each dir if empty; stops at first non-empty dir. - Cover strategy: - - EPUB: cover from file + cache - - PDF/CBR: thumbnail via cover cache + - EPUB: extracted from ZIP + cached in `library_cover_cache` + - PDF: first page rendered as thumbnail, cached + - CBR/CBZ: first page extracted, cached - Rating storage: - EPUB: `` in OPF - CBZ: `N` in `ComicInfo.xml` inside the ZIP - CBR/PDF: DB only - - `upsert_book` uses `CASE WHEN EXCLUDED.rating > 0 THEN EXCLUDED.rating ELSE library.rating END` to restore rating from file without overwriting existing DB value + - `upsert_book` uses `CASE WHEN EXCLUDED.rating > 0 THEN EXCLUDED.rating ELSE library.rating END` to restore rating from file without overwriting existing DB value. +- Tag types in `book_tags`: `genre`, `subgenre`, `tag`, `subject`. No direct `genres`/`subgenres` fields on book objects; always use helpers `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`. + +--- ## Performance Notes - Library load is optimized for large datasets: @@ -195,3 +246,15 @@ Dropbox settings are managed via the web UI on `/backup`. - `idx_library_archived` - `idx_reading_sessions_filename_readat` - `idx_book_tags_filename_tag` + +--- + +## Known Bugs Fixed +- `renderGenreView` and `renderSearchResults` in `library.js` referenced `b.genres` (non-existent). Fixed: use `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`. +- `PillInput` in `book.js` did not handle comma as delimiter and did not flush on save. Fixed: comma keydown + `flush()` in `saveEdit()`. +- `PATCH /library/book` failed for PDFs: `_sync_epub_metadata` tried to open PDF as ZIP. Fixed: only called for `.epub`. +- `_make_rel_path` in `reader.py` lacked format prefix (`epub/`, `pdf/`, `comics/`). Fixed: aligned with `common.make_rel_path`. +- `common.make_rel_path` always generated `.cbr` extension for CBZ files (both map to `media_type="cbr"`). Fixed: accepts optional `ext` parameter; `library.py` import now passes actual suffix. +- `/download/{filename}` was referenced in `book.html` but no endpoint existed (404). Fixed: added `GET /download/{filename}` to `library.py`. +- PDF reader showed infinite loading: `reader.html` called EPUB-only `/library/chapters/`. Fixed: PDF path uses `/api/pdf/info/` + page-image rendering. +- Empty dir pruning only ran when file was moved. Fixed: `prune_empty_dirs(old_path.parent)` always runs after a successful metadata save. diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md index f805c64..e4cc150 100644 --- a/docs/changelog-develop.md +++ b/docs/changelog-develop.md @@ -46,7 +46,12 @@ This file tracks changes on the `develop` line. - Updated Docker image with `postgresql-client` for `pg_dump`. - Multiple test builds pushed to `gitea.oskamp.info/ivooskamp/novela:dev`. -## 2026-03-25 +## 2026-03-25 (3) +- Fixed CBZ extension in import: `common.make_rel_path` always generated `.cbr` for CBZ files; now accepts `ext` parameter; `library.py` passes actual suffix so CBZ files land at `comics/{author}/{title}.cbz` +- Added missing `GET /download/{filename}` endpoint (referenced in book.html but was 404) +- TECHNICAL.md fully rewritten: added File Storage Paths section, complete endpoint lists for all routers including settings.py, corrected path documentation + +## 2026-03-25 (2) - Fixed PDF metadata editing (PATCH /library/book): - `_sync_epub_metadata` is now only called for `.epub` files; PDFs update DB only - `_make_rel_path` now includes the format prefix matching import: EPUB → `epub/{publisher}/{author}/…`, PDF → `pdf/{author}/{title}.pdf`, CBR/CBZ → `comics/{author}/{title}{ext}`; previously files were moved outside their format directory on metadata save