Apply review fixes for reader/editor, backup deps, and env docs

This commit is contained in:
Ivo Oskamp 2026-03-22 16:23:42 +01:00
parent e7759da4ae
commit ced5b25dbe
4 changed files with 103 additions and 262 deletions

View File

@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libmagic1 \
unrar-free \
postgresql-client \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/requirements.txt

View File

@ -12,25 +12,11 @@ from fastapi.templating import Jinja2Templates
from db import get_db_conn
from epub import read_epub_file, write_epub_file
from routers.common import LIBRARY_DIR, resolve_library_path
router = APIRouter()
templates = Jinja2Templates(directory="templates")
OUTPUT_DIR = Path("library")
OUTPUT_ROOT = OUTPUT_DIR.resolve()
def _resolve_output_path(filename: str) -> Path | None:
rel = Path(filename)
if rel.is_absolute() or any(part in {"", ".", ".."} for part in rel.parts):
return None
candidate = (OUTPUT_DIR / rel).resolve()
try:
candidate.relative_to(OUTPUT_ROOT)
except ValueError:
return None
return candidate
def _norm(base_dir: str, rel: str) -> str:
rel = (rel or "").split("#", 1)[0].strip()
@ -172,7 +158,7 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat
@router.get("/library/editor/{filename:path}", response_class=HTMLResponse)
async def editor_page(filename: str, request: Request):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None or not path.exists():
return HTMLResponse("Not found", status_code=404)
@ -187,7 +173,7 @@ async def editor_page(filename: str, request: Request):
@router.get("/api/edit/chapter/{index:int}/{filename:path}")
async def get_edit_chapter(filename: str, index: int):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None or not path.exists():
return Response(status_code=404)
spine = _epub_spine(path)
@ -200,7 +186,7 @@ async def get_edit_chapter(filename: str, index: int):
@router.post("/api/edit/chapter/{index:int}/{filename:path}")
async def save_edit_chapter(filename: str, index: int, request: Request):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "not found"}, status_code=404)
if not path.exists():
@ -222,7 +208,7 @@ async def save_edit_chapter(filename: str, index: int, request: Request):
@router.post("/api/edit/chapter/add/{filename:path}")
async def add_edit_chapter(filename: str, request: Request):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "not found"}, status_code=404)
if not path.exists():
@ -353,7 +339,7 @@ async def add_edit_chapter(filename: str, request: Request):
@router.delete("/api/edit/chapter/{index:int}/{filename:path}")
async def delete_edit_chapter(filename: str, index: int):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "not found"}, status_code=404)
if not path.exists():

View File

@ -1,14 +1,7 @@
"""
reader.py In-browser EPUB reader routes.
Registered in main.py via app.include_router(reader.router).
Shared low-level helpers (_db_conn, _scan_epub) are defined locally to
avoid circular imports with main.py.
"""
"""reader.py — Reader and book detail routes for EPUB/PDF/CBR."""
import html as _html
import io
import os
import posixpath
import re
import uuid
@ -16,116 +9,20 @@ import zipfile as zf
from datetime import datetime
from pathlib import Path
import psycopg2
from bs4 import BeautifulSoup
from fastapi import APIRouter, Request
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
from fastapi.templating import Jinja2Templates
from cbr import cbr_get_page
from db import get_db_conn
from epub import read_epub_file, write_epub_file
from pdf import pdf_render_page
from routers.common import LIBRARY_DIR, prune_empty_dirs, resolve_library_path, scan_epub
router = APIRouter()
templates = Jinja2Templates(directory="templates")
OUTPUT_DIR = Path("library")
OUTPUT_ROOT = OUTPUT_DIR.resolve()
# ---------------------------------------------------------------------------
# Shared helpers (local copies — avoids circular imports with main.py)
# ---------------------------------------------------------------------------
def _db_conn():
return psycopg2.connect(
host=os.environ.get("POSTGRES_HOST", "postgres"),
port=int(os.environ.get("POSTGRES_PORT", 5432)),
dbname=os.environ.get("POSTGRES_DB", "novela"),
user=os.environ.get("POSTGRES_USER", "novela"),
password=os.environ.get("POSTGRES_PASSWORD", ""),
)
def _scan_epub(path: Path) -> dict:
"""Inspect an EPUB zip and return metadata dict."""
has_cover = False
series = ""
series_index = 0
title = ""
publication_status = ""
author = ""
publisher = ""
source_url = ""
publish_date = ""
subjects: list[str] = []
description = ""
try:
with zf.ZipFile(path, "r") as z:
names = set(z.namelist())
has_cover = any(n.lower().endswith((".jpg", ".jpeg", ".png", ".webp", ".gif")) and "cover" in n.lower() for n in names)
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = _find_opf_path(names, container_xml)
if opf_path and opf_path in names:
opf = z.read(opf_path).decode("utf-8", errors="replace")
m = re.search(r'<(?:dc:)?title[^>]*>(.*?)</(?:dc:)?title>', opf, re.DOTALL | re.IGNORECASE)
if m:
title = _html.unescape(m.group(1).strip())
m = re.search(r'<(?:dc:)?creator[^>]*>(.*?)</(?:dc:)?creator>', opf, re.DOTALL | re.IGNORECASE)
if m:
author = _html.unescape(m.group(1).strip())
m = re.search(r'<(?:dc:)?publisher[^>]*>(.*?)</(?:dc:)?publisher>', opf, re.DOTALL | re.IGNORECASE)
if m:
publisher = _html.unescape(m.group(1).strip())
m = re.search(r'<meta[^>]*name="calibre:series"[^>]*content="([^"]+)"', opf, re.IGNORECASE)
if m:
series = _html.unescape(m.group(1).strip())
mi = re.search(r'<meta[^>]*name="calibre:series_index"[^>]*content="([^"]+)"', opf, re.IGNORECASE)
if mi:
try:
series_index = int(float(mi.group(1)))
except Exception:
series_index = 0
ms = re.search(r'<meta[^>]*name="publication_status"[^>]*content="([^"]+)"', opf, re.IGNORECASE)
if ms:
publication_status = _html.unescape(ms.group(1).strip())
m = re.search(r'<(?:dc:)?source[^>]*>(.*?)</(?:dc:)?source>', opf, re.DOTALL | re.IGNORECASE)
if m:
source_url = _html.unescape(m.group(1).strip())
m = re.search(r'<(?:dc:)?date[^>]*>(.*?)</(?:dc:)?date>', opf, re.DOTALL | re.IGNORECASE)
if m:
publish_date = _html.unescape(m.group(1).strip())
date_candidate = publish_date.split('T', 1)[0]
try:
parsed_date = datetime.fromisoformat(date_candidate).date()
publish_date = parsed_date.isoformat() if parsed_date.year >= 1900 else ''
except Exception:
publish_date = ''
subjects = [
_html.unescape(s.strip())
for s in re.findall(r'<(?:dc:)?subject[^>]*>(.*?)</(?:dc:)?subject>', opf, re.DOTALL | re.IGNORECASE)
if s.strip()
]
m = re.search(r'<(?:dc:)?description[^>]*>(.*?)</(?:dc:)?description>', opf, re.DOTALL | re.IGNORECASE)
if m:
description = _html.unescape(m.group(1).strip())
except Exception:
pass
return {
"has_cover": has_cover,
"series": series,
"series_index": series_index,
"title": title,
"publication_status": publication_status,
"author": author,
"publisher": publisher,
"source_url": source_url,
"publish_date": publish_date,
"subjects": subjects,
"description": description,
}
# ---------------------------------------------------------------------------
# EPUB helpers
# ---------------------------------------------------------------------------
@ -410,33 +307,6 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat
f.write(out.getvalue())
def _resolve_output_path(filename: str) -> Path | None:
rel = Path(filename)
if rel.is_absolute() or any(part in {"", ".", ".."} for part in rel.parts):
return None
candidate = (OUTPUT_DIR / rel).resolve()
try:
candidate.relative_to(OUTPUT_ROOT)
except ValueError:
return None
return candidate
def _prune_empty_output_dirs(start_dir: Path) -> None:
"""Remove empty parent directories under OUTPUT_DIR, but never OUTPUT_DIR itself."""
try:
cur = start_dir.resolve()
cur.relative_to(OUTPUT_ROOT)
except Exception:
return
while cur != OUTPUT_ROOT:
try:
cur.rmdir()
except OSError:
break
cur = cur.parent
def _clean_segment(value: str, fallback: str, max_len: int = 100) -> str:
txt = re.sub(r"\s+", " ", (value or "").strip())
@ -478,7 +348,7 @@ def _ensure_unique_rel_path(rel_path: Path, *, exclude: Path | None = None) -> P
candidate = base
counter = 2
while True:
full = (OUTPUT_DIR / candidate).resolve()
full = (LIBRARY_DIR / candidate).resolve()
if exclude is not None and full == exclude.resolve():
return candidate
if not full.exists():
@ -499,7 +369,7 @@ def _guard(filename: str) -> bool:
@router.get("/library/epub/{filename:path}")
async def library_epub(filename: str):
"""Serve EPUB inline (no Content-Disposition: attachment) for the reader."""
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return Response(status_code=404)
if not path.exists():
@ -509,7 +379,7 @@ async def library_epub(filename: str):
@router.get("/library/chapters/{filename:path}")
async def get_chapter_list(filename: str):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return Response(status_code=404)
if not path.exists():
@ -520,7 +390,7 @@ async def get_chapter_list(filename: str):
@router.get("/library/chapter/{index}/{filename:path}")
async def get_chapter_html(filename: str, index: int):
"""Extract a single chapter from the EPUB and return it as an HTML fragment."""
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return Response(status_code=404)
if not path.exists():
@ -555,7 +425,7 @@ async def get_chapter_html(filename: str, index: int):
@router.get("/library/chapter-img/{path:path}")
async def get_chapter_image(path: str, filename: str):
"""Serve an image extracted from the EPUB zip."""
epub_path = _resolve_output_path(filename)
epub_path = resolve_library_path(filename)
if epub_path is None:
return Response(status_code=404)
if not epub_path.exists():
@ -573,19 +443,16 @@ async def get_chapter_image(path: str, filename: str):
@router.get("/library/progress/{filename:path}")
async def get_progress(filename: str):
if _resolve_output_path(filename) is None:
if resolve_library_path(filename) is None:
return {"error": "Invalid filename"}
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"SELECT cfi, page, progress FROM reading_progress WHERE filename = %s",
(filename,),
)
row = cur.fetchone()
return {"cfi": row[0], "progress": row[1] or 0} if row else {"cfi": None, "progress": 0}
finally:
conn.close()
return {"cfi": row[0], "page": row[1], "progress": row[2] or 0} if row else {"cfi": None, "page": None, "progress": 0}
@router.delete("/library/progress/{filename:path}")
@ -594,21 +461,18 @@ async def clear_progress(filename: str):
Reading sessions (mark-as-read history) are intentionally left intact.
"""
if _resolve_output_path(filename) is None:
if resolve_library_path(filename) is None:
return {"error": "Invalid filename"}
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM reading_progress WHERE filename = %s", (filename,))
finally:
conn.close()
return {"ok": True}
@router.post("/library/progress/{filename:path}")
async def save_progress(filename: str, request: Request):
if _resolve_output_path(filename) is None:
if resolve_library_path(filename) is None:
return {"error": "Invalid filename"}
body = await request.json()
cfi = body.get("cfi", "")
@ -619,8 +483,7 @@ async def save_progress(filename: str, request: Request):
except Exception:
page = None
progress = max(0, min(100, int(body.get("progress", 0))))
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
@ -635,16 +498,14 @@ async def save_progress(filename: str, request: Request):
""",
(filename, cfi, page, progress),
)
finally:
conn.close()
return {"ok": True}
@router.post("/library/mark-read/{filename:path}")
async def library_mark_read(filename: str, request: Request):
if _resolve_output_path(filename) is None:
if resolve_library_path(filename) is None:
return {"error": "Invalid filename"}
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None or not path.exists():
return {"error": "File not found"}
body = {}
@ -653,8 +514,7 @@ async def library_mark_read(filename: str, request: Request):
except Exception:
pass
read_at = body.get("read_at") # ISO datetime string, or None for now
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
if read_at:
@ -668,20 +528,17 @@ async def library_mark_read(filename: str, request: Request):
(filename,),
)
cur.execute("DELETE FROM reading_progress WHERE filename = %s", (filename,))
finally:
conn.close()
return {"ok": True}
@router.get("/library/book/{filename:path}", response_class=HTMLResponse)
async def book_detail_page(filename: str, request: Request):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return HTMLResponse("Not found", status_code=404)
if not path.exists():
return HTMLResponse("Not found", status_code=404)
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""
@ -709,7 +566,7 @@ async def book_detail_page(filename: str, request: Request):
}
# Supplement empty fields from EPUB metadata
if not entry["source_url"] or not entry["publish_date"] or not entry["description"]:
epub_meta = _scan_epub(path)
epub_meta = scan_epub(path)
if not entry["source_url"]:
entry["source_url"] = epub_meta.get("source_url", "")
if not entry["publish_date"]:
@ -717,7 +574,7 @@ async def book_detail_page(filename: str, request: Request):
if not entry["description"]:
entry["description"] = epub_meta.get("description", "")
else:
entry = _scan_epub(path)
entry = scan_epub(path)
entry.setdefault("want_to_read", False)
entry.setdefault("archived", False)
entry.setdefault("publish_date", "")
@ -741,7 +598,7 @@ async def book_detail_page(filename: str, request: Request):
if not rows:
# Fallback for books where tags only exist in OPF after DB loss/rebuild.
epub_meta = _scan_epub(path)
epub_meta = scan_epub(path)
for subject in epub_meta.get("subjects", []):
if subject not in tags_list:
tags_list.append(subject)
@ -761,8 +618,6 @@ async def book_detail_page(filename: str, request: Request):
row = cur.fetchone()
progress = row[1] or 0 if row else 0
cfi = row[0] if row else None
finally:
conn.close()
return templates.TemplateResponse(request, "book.html", {
"active": "book",
"filename": filename,
@ -794,8 +649,7 @@ async def api_genres(type: str | None = None):
Optional ``type`` query parameter filters by tag_type (genre, subgenre, tag).
"""
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn.cursor() as cur:
if type == "tag":
cur.execute(
@ -810,14 +664,12 @@ async def api_genres(type: str | None = None):
cur.execute("SELECT DISTINCT tag FROM book_tags ORDER BY tag")
result = [r[0] for r in cur.fetchall()]
return JSONResponse(result)
finally:
conn.close()
@router.patch("/library/book/{filename:path}")
async def book_update(filename: str, request: Request):
"""Update book metadata and tags, and rename/move the file when needed."""
old_path = _resolve_output_path(filename)
old_path = resolve_library_path(filename)
if old_path is None or not old_path.exists():
return JSONResponse({"error": "not found"}, status_code=404)
@ -837,7 +689,7 @@ async def book_update(filename: str, request: Request):
)
target_rel = _ensure_unique_rel_path(target_rel, exclude=old_path)
new_filename = target_rel.as_posix()
new_path = (OUTPUT_DIR / target_rel).resolve()
new_path = (LIBRARY_DIR / target_rel).resolve()
moved = False
old_parent_to_prune: Path | None = None
@ -847,7 +699,6 @@ async def book_update(filename: str, request: Request):
moved = True
old_parent_to_prune = old_path.parent
conn = _db_conn()
try:
_sync_epub_metadata(
new_path,
@ -863,11 +714,12 @@ async def book_update(filename: str, request: Request):
subjects=(body.get("genres", []) + body.get("subgenres", []) + body.get("tags", [])),
)
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute("SELECT has_cover FROM library WHERE filename = %s", (filename,))
row = cur.fetchone()
has_cover = bool(row[0]) if row and row[0] is not None else bool(_scan_epub(new_path if moved else old_path).get("has_cover", False))
has_cover = bool(row[0]) if row and row[0] is not None else bool(scan_epub(new_path if moved else old_path).get("has_cover", False))
cur.execute(
"""
@ -915,9 +767,9 @@ async def book_update(filename: str, request: Request):
cur.execute("DELETE FROM book_tags WHERE filename = %s", (new_filename,))
rows = (
[(new_filename, g, "genre") for g in body.get("genres", []) if g] +
[(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g] +
[(new_filename, g, "tag") for g in body.get("tags", []) if g]
[(new_filename, g, "genre") for g in body.get("genres", []) if g]
+ [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g]
+ [(new_filename, g, "tag") for g in body.get("tags", []) if g]
)
if rows:
cur.executemany(
@ -925,32 +777,28 @@ async def book_update(filename: str, request: Request):
" ON CONFLICT (filename, tag, tag_type) DO NOTHING",
rows,
)
if old_parent_to_prune is not None:
_prune_empty_output_dirs(old_parent_to_prune)
prune_empty_dirs(old_parent_to_prune)
return JSONResponse({"ok": True, "filename": new_filename, "renamed": new_filename != filename})
except Exception as e:
if moved and new_path.exists() and not old_path.exists():
new_path.replace(old_path)
return JSONResponse({"error": str(e)}, status_code=500)
finally:
conn.close()
@router.get("/library/read/{filename:path}", response_class=HTMLResponse)
async def reader_page(filename: str, request: Request):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return HTMLResponse("Not found", status_code=404)
if not path.exists():
return HTMLResponse("Not found", status_code=404)
conn = _db_conn()
try:
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute("SELECT title FROM library WHERE filename = %s", (filename,))
row = cur.fetchone()
title = row[0] if row and row[0] else filename
finally:
conn.close()
return templates.TemplateResponse(request, "reader.html", {
"filename": filename,
"title": title,
@ -959,7 +807,7 @@ async def reader_page(filename: str, request: Request):
@router.get("/library/pdf/{filename:path}")
async def library_pdf_page(filename: str, page: int = 0, dpi: int = 150):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "Invalid filename"}, status_code=400)
if not path.exists():
@ -977,7 +825,7 @@ async def library_pdf_page(filename: str, page: int = 0, dpi: int = 150):
@router.get("/library/cbr/{filename:path}/{page:int}")
async def library_cbr_page(filename: str, page: int):
path = _resolve_output_path(filename)
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "Invalid filename"}, status_code=400)
if not path.exists():

View File

@ -5,3 +5,9 @@ POSTGRES_PASSWORD=change-me
# Required for credential encryption/decryption (Fernet) in DB.
# Keep this stable after first use; changing it breaks decrypt of existing credentials.
NOVELA_MASTER_KEY=change-me-long-random-secret
# Dropbox root-map voor backup uploads (default: /novela)
DROPBOX_BACKUP_ROOT=/novela
# Map voor backup manifest/config binnen container (default: config)
CONFIG_DIR=config