Apply review fixes for reader/editor, backup deps, and env docs

2026-03-22 16:23:42 +01:00 · 2026-03-22 16:23:42 +01:00 · ced5b25dbe
commit ced5b25dbe
parent e7759da4ae
4 changed files with 103 additions and 262 deletions
--- a/containers/novela/Dockerfile
+++ b/containers/novela/Dockerfile
@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    libmagic1 \
    unrar-free \
+    postgresql-client \
    && rm -rf /var/lib/apt/lists/*

 COPY requirements.txt /app/requirements.txt
--- a/containers/novela/routers/editor.py
+++ b/containers/novela/routers/editor.py
@ -12,25 +12,11 @@ from fastapi.templating import Jinja2Templates

 from db import get_db_conn
 from epub import read_epub_file, write_epub_file
+from routers.common import LIBRARY_DIR, resolve_library_path

 router = APIRouter()
 templates = Jinja2Templates(directory="templates")

-OUTPUT_DIR = Path("library")
-OUTPUT_ROOT = OUTPUT_DIR.resolve()
-
-
-def _resolve_output_path(filename: str) -> Path | None:
-    rel = Path(filename)
-    if rel.is_absolute() or any(part in {"", ".", ".."} for part in rel.parts):
-        return None
-    candidate = (OUTPUT_DIR / rel).resolve()
-    try:
-        candidate.relative_to(OUTPUT_ROOT)
-    except ValueError:
-        return None
-    return candidate
-

 def _norm(base_dir: str, rel: str) -> str:
    rel = (rel or "").split("#", 1)[0].strip()
@ -172,7 +158,7 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat

@router.get("/library/editor/{filename:path}", response_class=HTMLResponse)
 async def editor_page(filename: str, request: Request):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None or not path.exists():
        return HTMLResponse("Not found", status_code=404)

@ -187,7 +173,7 @@ async def editor_page(filename: str, request: Request):

@router.get("/api/edit/chapter/{index:int}/{filename:path}")
 async def get_edit_chapter(filename: str, index: int):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None or not path.exists():
        return Response(status_code=404)
    spine = _epub_spine(path)
@ -200,7 +186,7 @@ async def get_edit_chapter(filename: str, index: int):

@router.post("/api/edit/chapter/{index:int}/{filename:path}")
 async def save_edit_chapter(filename: str, index: int, request: Request):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return JSONResponse({"error": "not found"}, status_code=404)
    if not path.exists():
@ -222,7 +208,7 @@ async def save_edit_chapter(filename: str, index: int, request: Request):

@router.post("/api/edit/chapter/add/{filename:path}")
 async def add_edit_chapter(filename: str, request: Request):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return JSONResponse({"error": "not found"}, status_code=404)
    if not path.exists():
@ -353,7 +339,7 @@ async def add_edit_chapter(filename: str, request: Request):

@router.delete("/api/edit/chapter/{index:int}/{filename:path}")
 async def delete_edit_chapter(filename: str, index: int):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return JSONResponse({"error": "not found"}, status_code=404)
    if not path.exists():
--- a/containers/novela/routers/reader.py
+++ b/containers/novela/routers/reader.py
@ -1,14 +1,7 @@
-"""
-reader.py — In-browser EPUB reader routes.
-
-Registered in main.py via app.include_router(reader.router).
-Shared low-level helpers (_db_conn, _scan_epub) are defined locally to
-avoid circular imports with main.py.
-"""
+"""reader.py — Reader and book detail routes for EPUB/PDF/CBR."""

 import html as _html
 import io
-import os
 import posixpath
 import re
 import uuid
@ -16,116 +9,20 @@ import zipfile as zf
 from datetime import datetime
 from pathlib import Path

-import psycopg2
 from bs4 import BeautifulSoup
 from fastapi import APIRouter, Request
 from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
 from fastapi.templating import Jinja2Templates

 from cbr import cbr_get_page
+from db import get_db_conn
 from epub import read_epub_file, write_epub_file
 from pdf import pdf_render_page
+from routers.common import LIBRARY_DIR, prune_empty_dirs, resolve_library_path, scan_epub

 router = APIRouter()
 templates = Jinja2Templates(directory="templates")

-OUTPUT_DIR = Path("library")
-OUTPUT_ROOT = OUTPUT_DIR.resolve()
-
-
-# ---------------------------------------------------------------------------
-# Shared helpers (local copies — avoids circular imports with main.py)
-# ---------------------------------------------------------------------------
-
-def _db_conn():
-    return psycopg2.connect(
-        host=os.environ.get("POSTGRES_HOST", "postgres"),
-        port=int(os.environ.get("POSTGRES_PORT", 5432)),
-        dbname=os.environ.get("POSTGRES_DB", "novela"),
-        user=os.environ.get("POSTGRES_USER", "novela"),
-        password=os.environ.get("POSTGRES_PASSWORD", ""),
-    )
-
-
-def _scan_epub(path: Path) -> dict:
-    """Inspect an EPUB zip and return metadata dict."""
-    has_cover = False
-    series = ""
-    series_index = 0
-    title = ""
-    publication_status = ""
-    author = ""
-    publisher = ""
-    source_url = ""
-    publish_date = ""
-    subjects: list[str] = []
-    description = ""
-    try:
-        with zf.ZipFile(path, "r") as z:
-            names = set(z.namelist())
-            has_cover = any(n.lower().endswith((".jpg", ".jpeg", ".png", ".webp", ".gif")) and "cover" in n.lower() for n in names)
-            container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
-            opf_path = _find_opf_path(names, container_xml)
-            if opf_path and opf_path in names:
-                opf = z.read(opf_path).decode("utf-8", errors="replace")
-                m = re.search(r'<(?:dc:)?title[^>]*>(.*?)</(?:dc:)?title>', opf, re.DOTALL | re.IGNORECASE)
-                if m:
-                    title = _html.unescape(m.group(1).strip())
-                m = re.search(r'<(?:dc:)?creator[^>]*>(.*?)</(?:dc:)?creator>', opf, re.DOTALL | re.IGNORECASE)
-                if m:
-                    author = _html.unescape(m.group(1).strip())
-                m = re.search(r'<(?:dc:)?publisher[^>]*>(.*?)</(?:dc:)?publisher>', opf, re.DOTALL | re.IGNORECASE)
-                if m:
-                    publisher = _html.unescape(m.group(1).strip())
-                m = re.search(r'<meta[^>]*name="calibre:series"[^>]*content="([^"]+)"', opf, re.IGNORECASE)
-                if m:
-                    series = _html.unescape(m.group(1).strip())
-                mi = re.search(r'<meta[^>]*name="calibre:series_index"[^>]*content="([^"]+)"', opf, re.IGNORECASE)
-                if mi:
-                    try:
-                        series_index = int(float(mi.group(1)))
-                    except Exception:
-                        series_index = 0
-                ms = re.search(r'<meta[^>]*name="publication_status"[^>]*content="([^"]+)"', opf, re.IGNORECASE)
-                if ms:
-                    publication_status = _html.unescape(ms.group(1).strip())
-                m = re.search(r'<(?:dc:)?source[^>]*>(.*?)</(?:dc:)?source>', opf, re.DOTALL | re.IGNORECASE)
-                if m:
-                    source_url = _html.unescape(m.group(1).strip())
-                m = re.search(r'<(?:dc:)?date[^>]*>(.*?)</(?:dc:)?date>', opf, re.DOTALL | re.IGNORECASE)
-                if m:
-                    publish_date = _html.unescape(m.group(1).strip())
-                    date_candidate = publish_date.split('T', 1)[0]
-                    try:
-                        parsed_date = datetime.fromisoformat(date_candidate).date()
-                        publish_date = parsed_date.isoformat() if parsed_date.year >= 1900 else ''
-                    except Exception:
-                        publish_date = ''
-                subjects = [
-                    _html.unescape(s.strip())
-                    for s in re.findall(r'<(?:dc:)?subject[^>]*>(.*?)</(?:dc:)?subject>', opf, re.DOTALL | re.IGNORECASE)
-                    if s.strip()
-                ]
-                m = re.search(r'<(?:dc:)?description[^>]*>(.*?)</(?:dc:)?description>', opf, re.DOTALL | re.IGNORECASE)
-                if m:
-                    description = _html.unescape(m.group(1).strip())
-    except Exception:
-        pass
-    return {
-        "has_cover": has_cover,
-        "series": series,
-        "series_index": series_index,
-        "title": title,
-        "publication_status": publication_status,
-        "author": author,
-        "publisher": publisher,
-        "source_url": source_url,
-        "publish_date": publish_date,
-        "subjects": subjects,
-        "description": description,
-    }
-
-
 # ---------------------------------------------------------------------------
 # EPUB helpers
 # ---------------------------------------------------------------------------
@ -410,33 +307,6 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat
        f.write(out.getvalue())


-def _resolve_output_path(filename: str) -> Path | None:
-    rel = Path(filename)
-    if rel.is_absolute() or any(part in {"", ".", ".."} for part in rel.parts):
-        return None
-    candidate = (OUTPUT_DIR / rel).resolve()
-    try:
-        candidate.relative_to(OUTPUT_ROOT)
-    except ValueError:
-        return None
-    return candidate
-
-
-def _prune_empty_output_dirs(start_dir: Path) -> None:
-    """Remove empty parent directories under OUTPUT_DIR, but never OUTPUT_DIR itself."""
-    try:
-        cur = start_dir.resolve()
-        cur.relative_to(OUTPUT_ROOT)
-    except Exception:
-        return
-
-    while cur != OUTPUT_ROOT:
-        try:
-            cur.rmdir()
-        except OSError:
-            break
-        cur = cur.parent
-

 def _clean_segment(value: str, fallback: str, max_len: int = 100) -> str:
    txt = re.sub(r"\s+", " ", (value or "").strip())
@ -478,7 +348,7 @@ def _ensure_unique_rel_path(rel_path: Path, *, exclude: Path | None = None) -> P
    candidate = base
    counter = 2
    while True:
-        full = (OUTPUT_DIR / candidate).resolve()
+        full = (LIBRARY_DIR / candidate).resolve()
        if exclude is not None and full == exclude.resolve():
            return candidate
        if not full.exists():
@ -499,7 +369,7 @@ def _guard(filename: str) -> bool:
@router.get("/library/epub/{filename:path}")
 async def library_epub(filename: str):
    """Serve EPUB inline (no Content-Disposition: attachment) for the reader."""
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return Response(status_code=404)
    if not path.exists():
@ -509,7 +379,7 @@ async def library_epub(filename: str):

@router.get("/library/chapters/{filename:path}")
 async def get_chapter_list(filename: str):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return Response(status_code=404)
    if not path.exists():
@ -520,7 +390,7 @@ async def get_chapter_list(filename: str):
@router.get("/library/chapter/{index}/{filename:path}")
 async def get_chapter_html(filename: str, index: int):
    """Extract a single chapter from the EPUB and return it as an HTML fragment."""
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return Response(status_code=404)
    if not path.exists():
@ -555,7 +425,7 @@ async def get_chapter_html(filename: str, index: int):
@router.get("/library/chapter-img/{path:path}")
 async def get_chapter_image(path: str, filename: str):
    """Serve an image extracted from the EPUB zip."""
-    epub_path = _resolve_output_path(filename)
+    epub_path = resolve_library_path(filename)
    if epub_path is None:
        return Response(status_code=404)
    if not epub_path.exists():
@ -573,19 +443,16 @@ async def get_chapter_image(path: str, filename: str):

@router.get("/library/progress/{filename:path}")
 async def get_progress(filename: str):
-    if _resolve_output_path(filename) is None:
+    if resolve_library_path(filename) is None:
        return {"error": "Invalid filename"}
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
                "SELECT cfi, page, progress FROM reading_progress WHERE filename = %s",
                (filename,),
            )
            row = cur.fetchone()
-            return {"cfi": row[0], "progress": row[1] or 0} if row else {"cfi": None, "progress": 0}
-    finally:
-        conn.close()
+            return {"cfi": row[0], "page": row[1], "progress": row[2] or 0} if row else {"cfi": None, "page": None, "progress": 0}


@router.delete("/library/progress/{filename:path}")
@ -594,21 +461,18 @@ async def clear_progress(filename: str):

    Reading sessions (mark-as-read history) are intentionally left intact.
    """
-    if _resolve_output_path(filename) is None:
+    if resolve_library_path(filename) is None:
        return {"error": "Invalid filename"}
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute("DELETE FROM reading_progress WHERE filename = %s", (filename,))
-    finally:
-        conn.close()
    return {"ok": True}


@router.post("/library/progress/{filename:path}")
 async def save_progress(filename: str, request: Request):
-    if _resolve_output_path(filename) is None:
+    if resolve_library_path(filename) is None:
        return {"error": "Invalid filename"}
    body = await request.json()
    cfi = body.get("cfi", "")
@ -619,8 +483,7 @@ async def save_progress(filename: str, request: Request):
        except Exception:
            page = None
    progress = max(0, min(100, int(body.get("progress", 0))))
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute(
@ -635,16 +498,14 @@ async def save_progress(filename: str, request: Request):
                    """,
                    (filename, cfi, page, progress),
                )
-    finally:
-        conn.close()
    return {"ok": True}


@router.post("/library/mark-read/{filename:path}")
 async def library_mark_read(filename: str, request: Request):
-    if _resolve_output_path(filename) is None:
+    if resolve_library_path(filename) is None:
        return {"error": "Invalid filename"}
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None or not path.exists():
        return {"error": "File not found"}
    body = {}
@ -653,8 +514,7 @@ async def library_mark_read(filename: str, request: Request):
    except Exception:
        pass
    read_at = body.get("read_at")  # ISO datetime string, or None for now
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                if read_at:
@ -668,20 +528,17 @@ async def library_mark_read(filename: str, request: Request):
                        (filename,),
                    )
                cur.execute("DELETE FROM reading_progress WHERE filename = %s", (filename,))
-    finally:
-        conn.close()
    return {"ok": True}


@router.get("/library/book/{filename:path}", response_class=HTMLResponse)
 async def book_detail_page(filename: str, request: Request):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return HTMLResponse("Not found", status_code=404)
    if not path.exists():
        return HTMLResponse("Not found", status_code=404)
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
                """
@ -709,7 +566,7 @@ async def book_detail_page(filename: str, request: Request):
                }
                # Supplement empty fields from EPUB metadata
                if not entry["source_url"] or not entry["publish_date"] or not entry["description"]:
-                    epub_meta = _scan_epub(path)
+                    epub_meta = scan_epub(path)
                    if not entry["source_url"]:
                        entry["source_url"] = epub_meta.get("source_url", "")
                    if not entry["publish_date"]:
@ -717,7 +574,7 @@ async def book_detail_page(filename: str, request: Request):
                    if not entry["description"]:
                        entry["description"] = epub_meta.get("description", "")
            else:
-                entry = _scan_epub(path)
+                entry = scan_epub(path)
                entry.setdefault("want_to_read", False)
                entry.setdefault("archived", False)
                entry.setdefault("publish_date", "")
@ -741,7 +598,7 @@ async def book_detail_page(filename: str, request: Request):

            if not rows:
                # Fallback for books where tags only exist in OPF after DB loss/rebuild.
-                epub_meta = _scan_epub(path)
+                epub_meta = scan_epub(path)
                for subject in epub_meta.get("subjects", []):
                    if subject not in tags_list:
                        tags_list.append(subject)
@ -761,8 +618,6 @@ async def book_detail_page(filename: str, request: Request):
            row     = cur.fetchone()
            progress = row[1] or 0 if row else 0
            cfi      = row[0] if row else None
-    finally:
-        conn.close()
    return templates.TemplateResponse(request, "book.html", {
        "active":             "book",
        "filename":           filename,
@ -794,8 +649,7 @@ async def api_genres(type: str | None = None):

    Optional ``type`` query parameter filters by tag_type (genre, subgenre, tag).
    """
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn.cursor() as cur:
            if type == "tag":
                cur.execute(
@ -810,14 +664,12 @@ async def api_genres(type: str | None = None):
                cur.execute("SELECT DISTINCT tag FROM book_tags ORDER BY tag")
            result = [r[0] for r in cur.fetchall()]
        return JSONResponse(result)
-    finally:
-        conn.close()


@router.patch("/library/book/{filename:path}")
 async def book_update(filename: str, request: Request):
    """Update book metadata and tags, and rename/move the file when needed."""
-    old_path = _resolve_output_path(filename)
+    old_path = resolve_library_path(filename)
    if old_path is None or not old_path.exists():
        return JSONResponse({"error": "not found"}, status_code=404)

@ -837,7 +689,7 @@ async def book_update(filename: str, request: Request):
    )
    target_rel = _ensure_unique_rel_path(target_rel, exclude=old_path)
    new_filename = target_rel.as_posix()
-    new_path = (OUTPUT_DIR / target_rel).resolve()
+    new_path = (LIBRARY_DIR / target_rel).resolve()

    moved = False
    old_parent_to_prune: Path | None = None
@ -847,7 +699,6 @@ async def book_update(filename: str, request: Request):
        moved = True
        old_parent_to_prune = old_path.parent

-    conn = _db_conn()
    try:
        _sync_epub_metadata(
            new_path,
@ -863,11 +714,12 @@ async def book_update(filename: str, request: Request):
            subjects=(body.get("genres", []) + body.get("subgenres", []) + body.get("tags", [])),
        )

+        with get_db_conn() as conn:
            with conn:
                with conn.cursor() as cur:
                    cur.execute("SELECT has_cover FROM library WHERE filename = %s", (filename,))
                    row = cur.fetchone()
-                has_cover = bool(row[0]) if row and row[0] is not None else bool(_scan_epub(new_path if moved else old_path).get("has_cover", False))
+                    has_cover = bool(row[0]) if row and row[0] is not None else bool(scan_epub(new_path if moved else old_path).get("has_cover", False))

                    cur.execute(
                        """
@ -915,9 +767,9 @@ async def book_update(filename: str, request: Request):

                    cur.execute("DELETE FROM book_tags WHERE filename = %s", (new_filename,))
                    rows = (
-                    [(new_filename, g, "genre")    for g in body.get("genres",    []) if g] +
-                    [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g] +
-                    [(new_filename, g, "tag")      for g in body.get("tags",      []) if g]
+                        [(new_filename, g, "genre") for g in body.get("genres", []) if g]
+                        + [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g]
+                        + [(new_filename, g, "tag") for g in body.get("tags", []) if g]
                    )
                    if rows:
                        cur.executemany(
@ -925,32 +777,28 @@ async def book_update(filename: str, request: Request):
                            " ON CONFLICT (filename, tag, tag_type) DO NOTHING",
                            rows,
                        )
+
        if old_parent_to_prune is not None:
-            _prune_empty_output_dirs(old_parent_to_prune)
+            prune_empty_dirs(old_parent_to_prune)
        return JSONResponse({"ok": True, "filename": new_filename, "renamed": new_filename != filename})
    except Exception as e:
        if moved and new_path.exists() and not old_path.exists():
            new_path.replace(old_path)
        return JSONResponse({"error": str(e)}, status_code=500)
-    finally:
-        conn.close()


@router.get("/library/read/{filename:path}", response_class=HTMLResponse)
 async def reader_page(filename: str, request: Request):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return HTMLResponse("Not found", status_code=404)
    if not path.exists():
        return HTMLResponse("Not found", status_code=404)
-    conn = _db_conn()
-    try:
+    with get_db_conn() as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT title FROM library WHERE filename = %s", (filename,))
            row = cur.fetchone()
            title = row[0] if row and row[0] else filename
-    finally:
-        conn.close()
    return templates.TemplateResponse(request, "reader.html", {
        "filename":  filename,
        "title":     title,
@ -959,7 +807,7 @@ async def reader_page(filename: str, request: Request):

@router.get("/library/pdf/{filename:path}")
 async def library_pdf_page(filename: str, page: int = 0, dpi: int = 150):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return JSONResponse({"error": "Invalid filename"}, status_code=400)
    if not path.exists():
@ -977,7 +825,7 @@ async def library_pdf_page(filename: str, page: int = 0, dpi: int = 150):

@router.get("/library/cbr/{filename:path}/{page:int}")
 async def library_cbr_page(filename: str, page: int):
-    path = _resolve_output_path(filename)
+    path = resolve_library_path(filename)
    if path is None:
        return JSONResponse({"error": "Invalid filename"}, status_code=400)
    if not path.exists():
--- a/stack/novela.env
+++ b/stack/novela.env
@ -5,3 +5,9 @@ POSTGRES_PASSWORD=change-me
 # Required for credential encryption/decryption (Fernet) in DB.
 # Keep this stable after first use; changing it breaks decrypt of existing credentials.
 NOVELA_MASTER_KEY=change-me-long-random-secret
+
+# Dropbox root-map voor backup uploads (default: /novela)
+DROPBOX_BACKUP_ROOT=/novela
+
+# Map voor backup manifest/config binnen container (default: config)
+CONFIG_DIR=config