diff --git a/containers/novela/Dockerfile b/containers/novela/Dockerfile index 7477bfb..61835d2 100644 --- a/containers/novela/Dockerfile +++ b/containers/novela/Dockerfile @@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ libmagic1 \ unrar-free \ + postgresql-client \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt /app/requirements.txt diff --git a/containers/novela/routers/editor.py b/containers/novela/routers/editor.py index 0d704e6..b3bc9f7 100644 --- a/containers/novela/routers/editor.py +++ b/containers/novela/routers/editor.py @@ -12,25 +12,11 @@ from fastapi.templating import Jinja2Templates from db import get_db_conn from epub import read_epub_file, write_epub_file +from routers.common import LIBRARY_DIR, resolve_library_path router = APIRouter() templates = Jinja2Templates(directory="templates") -OUTPUT_DIR = Path("library") -OUTPUT_ROOT = OUTPUT_DIR.resolve() - - -def _resolve_output_path(filename: str) -> Path | None: - rel = Path(filename) - if rel.is_absolute() or any(part in {"", ".", ".."} for part in rel.parts): - return None - candidate = (OUTPUT_DIR / rel).resolve() - try: - candidate.relative_to(OUTPUT_ROOT) - except ValueError: - return None - return candidate - def _norm(base_dir: str, rel: str) -> str: rel = (rel or "").split("#", 1)[0].strip() @@ -172,7 +158,7 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat @router.get("/library/editor/{filename:path}", response_class=HTMLResponse) async def editor_page(filename: str, request: Request): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None or not path.exists(): return HTMLResponse("Not found", status_code=404) @@ -187,7 +173,7 @@ async def editor_page(filename: str, request: Request): @router.get("/api/edit/chapter/{index:int}/{filename:path}") async def get_edit_chapter(filename: str, index: int): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None or not path.exists(): return Response(status_code=404) spine = _epub_spine(path) @@ -200,7 +186,7 @@ async def get_edit_chapter(filename: str, index: int): @router.post("/api/edit/chapter/{index:int}/{filename:path}") async def save_edit_chapter(filename: str, index: int, request: Request): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "not found"}, status_code=404) if not path.exists(): @@ -222,7 +208,7 @@ async def save_edit_chapter(filename: str, index: int, request: Request): @router.post("/api/edit/chapter/add/{filename:path}") async def add_edit_chapter(filename: str, request: Request): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "not found"}, status_code=404) if not path.exists(): @@ -353,7 +339,7 @@ async def add_edit_chapter(filename: str, request: Request): @router.delete("/api/edit/chapter/{index:int}/{filename:path}") async def delete_edit_chapter(filename: str, index: int): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "not found"}, status_code=404) if not path.exists(): diff --git a/containers/novela/routers/reader.py b/containers/novela/routers/reader.py index 57af742..4adff5a 100644 --- a/containers/novela/routers/reader.py +++ b/containers/novela/routers/reader.py @@ -1,14 +1,7 @@ -""" -reader.py — In-browser EPUB reader routes. - -Registered in main.py via app.include_router(reader.router). -Shared low-level helpers (_db_conn, _scan_epub) are defined locally to -avoid circular imports with main.py. -""" +"""reader.py — Reader and book detail routes for EPUB/PDF/CBR.""" import html as _html import io -import os import posixpath import re import uuid @@ -16,116 +9,20 @@ import zipfile as zf from datetime import datetime from pathlib import Path -import psycopg2 from bs4 import BeautifulSoup from fastapi import APIRouter, Request from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response from fastapi.templating import Jinja2Templates from cbr import cbr_get_page +from db import get_db_conn from epub import read_epub_file, write_epub_file from pdf import pdf_render_page +from routers.common import LIBRARY_DIR, prune_empty_dirs, resolve_library_path, scan_epub router = APIRouter() templates = Jinja2Templates(directory="templates") -OUTPUT_DIR = Path("library") -OUTPUT_ROOT = OUTPUT_DIR.resolve() - - -# --------------------------------------------------------------------------- -# Shared helpers (local copies — avoids circular imports with main.py) -# --------------------------------------------------------------------------- - -def _db_conn(): - return psycopg2.connect( - host=os.environ.get("POSTGRES_HOST", "postgres"), - port=int(os.environ.get("POSTGRES_PORT", 5432)), - dbname=os.environ.get("POSTGRES_DB", "novela"), - user=os.environ.get("POSTGRES_USER", "novela"), - password=os.environ.get("POSTGRES_PASSWORD", ""), - ) - - -def _scan_epub(path: Path) -> dict: - """Inspect an EPUB zip and return metadata dict.""" - has_cover = False - series = "" - series_index = 0 - title = "" - publication_status = "" - author = "" - publisher = "" - source_url = "" - publish_date = "" - subjects: list[str] = [] - description = "" - try: - with zf.ZipFile(path, "r") as z: - names = set(z.namelist()) - has_cover = any(n.lower().endswith((".jpg", ".jpeg", ".png", ".webp", ".gif")) and "cover" in n.lower() for n in names) - container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None - opf_path = _find_opf_path(names, container_xml) - if opf_path and opf_path in names: - opf = z.read(opf_path).decode("utf-8", errors="replace") - m = re.search(r'<(?:dc:)?title[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if m: - title = _html.unescape(m.group(1).strip()) - m = re.search(r'<(?:dc:)?creator[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if m: - author = _html.unescape(m.group(1).strip()) - m = re.search(r'<(?:dc:)?publisher[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if m: - publisher = _html.unescape(m.group(1).strip()) - m = re.search(r']*name="calibre:series"[^>]*content="([^"]+)"', opf, re.IGNORECASE) - if m: - series = _html.unescape(m.group(1).strip()) - mi = re.search(r']*name="calibre:series_index"[^>]*content="([^"]+)"', opf, re.IGNORECASE) - if mi: - try: - series_index = int(float(mi.group(1))) - except Exception: - series_index = 0 - ms = re.search(r']*name="publication_status"[^>]*content="([^"]+)"', opf, re.IGNORECASE) - if ms: - publication_status = _html.unescape(ms.group(1).strip()) - m = re.search(r'<(?:dc:)?source[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if m: - source_url = _html.unescape(m.group(1).strip()) - m = re.search(r'<(?:dc:)?date[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if m: - publish_date = _html.unescape(m.group(1).strip()) - date_candidate = publish_date.split('T', 1)[0] - try: - parsed_date = datetime.fromisoformat(date_candidate).date() - publish_date = parsed_date.isoformat() if parsed_date.year >= 1900 else '' - except Exception: - publish_date = '' - subjects = [ - _html.unescape(s.strip()) - for s in re.findall(r'<(?:dc:)?subject[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if s.strip() - ] - m = re.search(r'<(?:dc:)?description[^>]*>(.*?)', opf, re.DOTALL | re.IGNORECASE) - if m: - description = _html.unescape(m.group(1).strip()) - except Exception: - pass - return { - "has_cover": has_cover, - "series": series, - "series_index": series_index, - "title": title, - "publication_status": publication_status, - "author": author, - "publisher": publisher, - "source_url": source_url, - "publish_date": publish_date, - "subjects": subjects, - "description": description, - } - - # --------------------------------------------------------------------------- # EPUB helpers # --------------------------------------------------------------------------- @@ -410,33 +307,6 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat f.write(out.getvalue()) -def _resolve_output_path(filename: str) -> Path | None: - rel = Path(filename) - if rel.is_absolute() or any(part in {"", ".", ".."} for part in rel.parts): - return None - candidate = (OUTPUT_DIR / rel).resolve() - try: - candidate.relative_to(OUTPUT_ROOT) - except ValueError: - return None - return candidate - - -def _prune_empty_output_dirs(start_dir: Path) -> None: - """Remove empty parent directories under OUTPUT_DIR, but never OUTPUT_DIR itself.""" - try: - cur = start_dir.resolve() - cur.relative_to(OUTPUT_ROOT) - except Exception: - return - - while cur != OUTPUT_ROOT: - try: - cur.rmdir() - except OSError: - break - cur = cur.parent - def _clean_segment(value: str, fallback: str, max_len: int = 100) -> str: txt = re.sub(r"\s+", " ", (value or "").strip()) @@ -478,7 +348,7 @@ def _ensure_unique_rel_path(rel_path: Path, *, exclude: Path | None = None) -> P candidate = base counter = 2 while True: - full = (OUTPUT_DIR / candidate).resolve() + full = (LIBRARY_DIR / candidate).resolve() if exclude is not None and full == exclude.resolve(): return candidate if not full.exists(): @@ -499,7 +369,7 @@ def _guard(filename: str) -> bool: @router.get("/library/epub/{filename:path}") async def library_epub(filename: str): """Serve EPUB inline (no Content-Disposition: attachment) for the reader.""" - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return Response(status_code=404) if not path.exists(): @@ -509,7 +379,7 @@ async def library_epub(filename: str): @router.get("/library/chapters/{filename:path}") async def get_chapter_list(filename: str): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return Response(status_code=404) if not path.exists(): @@ -520,7 +390,7 @@ async def get_chapter_list(filename: str): @router.get("/library/chapter/{index}/{filename:path}") async def get_chapter_html(filename: str, index: int): """Extract a single chapter from the EPUB and return it as an HTML fragment.""" - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return Response(status_code=404) if not path.exists(): @@ -555,7 +425,7 @@ async def get_chapter_html(filename: str, index: int): @router.get("/library/chapter-img/{path:path}") async def get_chapter_image(path: str, filename: str): """Serve an image extracted from the EPUB zip.""" - epub_path = _resolve_output_path(filename) + epub_path = resolve_library_path(filename) if epub_path is None: return Response(status_code=404) if not epub_path.exists(): @@ -573,19 +443,16 @@ async def get_chapter_image(path: str, filename: str): @router.get("/library/progress/{filename:path}") async def get_progress(filename: str): - if _resolve_output_path(filename) is None: + if resolve_library_path(filename) is None: return {"error": "Invalid filename"} - conn = _db_conn() - try: + with get_db_conn() as conn: with conn.cursor() as cur: cur.execute( "SELECT cfi, page, progress FROM reading_progress WHERE filename = %s", (filename,), ) row = cur.fetchone() - return {"cfi": row[0], "progress": row[1] or 0} if row else {"cfi": None, "progress": 0} - finally: - conn.close() + return {"cfi": row[0], "page": row[1], "progress": row[2] or 0} if row else {"cfi": None, "page": None, "progress": 0} @router.delete("/library/progress/{filename:path}") @@ -594,21 +461,18 @@ async def clear_progress(filename: str): Reading sessions (mark-as-read history) are intentionally left intact. """ - if _resolve_output_path(filename) is None: + if resolve_library_path(filename) is None: return {"error": "Invalid filename"} - conn = _db_conn() - try: + with get_db_conn() as conn: with conn: with conn.cursor() as cur: cur.execute("DELETE FROM reading_progress WHERE filename = %s", (filename,)) - finally: - conn.close() return {"ok": True} @router.post("/library/progress/{filename:path}") async def save_progress(filename: str, request: Request): - if _resolve_output_path(filename) is None: + if resolve_library_path(filename) is None: return {"error": "Invalid filename"} body = await request.json() cfi = body.get("cfi", "") @@ -619,8 +483,7 @@ async def save_progress(filename: str, request: Request): except Exception: page = None progress = max(0, min(100, int(body.get("progress", 0)))) - conn = _db_conn() - try: + with get_db_conn() as conn: with conn: with conn.cursor() as cur: cur.execute( @@ -635,16 +498,14 @@ async def save_progress(filename: str, request: Request): """, (filename, cfi, page, progress), ) - finally: - conn.close() return {"ok": True} @router.post("/library/mark-read/{filename:path}") async def library_mark_read(filename: str, request: Request): - if _resolve_output_path(filename) is None: + if resolve_library_path(filename) is None: return {"error": "Invalid filename"} - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None or not path.exists(): return {"error": "File not found"} body = {} @@ -653,8 +514,7 @@ async def library_mark_read(filename: str, request: Request): except Exception: pass read_at = body.get("read_at") # ISO datetime string, or None for now - conn = _db_conn() - try: + with get_db_conn() as conn: with conn: with conn.cursor() as cur: if read_at: @@ -668,20 +528,17 @@ async def library_mark_read(filename: str, request: Request): (filename,), ) cur.execute("DELETE FROM reading_progress WHERE filename = %s", (filename,)) - finally: - conn.close() return {"ok": True} @router.get("/library/book/{filename:path}", response_class=HTMLResponse) async def book_detail_page(filename: str, request: Request): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return HTMLResponse("Not found", status_code=404) if not path.exists(): return HTMLResponse("Not found", status_code=404) - conn = _db_conn() - try: + with get_db_conn() as conn: with conn.cursor() as cur: cur.execute( """ @@ -709,7 +566,7 @@ async def book_detail_page(filename: str, request: Request): } # Supplement empty fields from EPUB metadata if not entry["source_url"] or not entry["publish_date"] or not entry["description"]: - epub_meta = _scan_epub(path) + epub_meta = scan_epub(path) if not entry["source_url"]: entry["source_url"] = epub_meta.get("source_url", "") if not entry["publish_date"]: @@ -717,7 +574,7 @@ async def book_detail_page(filename: str, request: Request): if not entry["description"]: entry["description"] = epub_meta.get("description", "") else: - entry = _scan_epub(path) + entry = scan_epub(path) entry.setdefault("want_to_read", False) entry.setdefault("archived", False) entry.setdefault("publish_date", "") @@ -741,7 +598,7 @@ async def book_detail_page(filename: str, request: Request): if not rows: # Fallback for books where tags only exist in OPF after DB loss/rebuild. - epub_meta = _scan_epub(path) + epub_meta = scan_epub(path) for subject in epub_meta.get("subjects", []): if subject not in tags_list: tags_list.append(subject) @@ -761,8 +618,6 @@ async def book_detail_page(filename: str, request: Request): row = cur.fetchone() progress = row[1] or 0 if row else 0 cfi = row[0] if row else None - finally: - conn.close() return templates.TemplateResponse(request, "book.html", { "active": "book", "filename": filename, @@ -794,8 +649,7 @@ async def api_genres(type: str | None = None): Optional ``type`` query parameter filters by tag_type (genre, subgenre, tag). """ - conn = _db_conn() - try: + with get_db_conn() as conn: with conn.cursor() as cur: if type == "tag": cur.execute( @@ -810,14 +664,12 @@ async def api_genres(type: str | None = None): cur.execute("SELECT DISTINCT tag FROM book_tags ORDER BY tag") result = [r[0] for r in cur.fetchall()] return JSONResponse(result) - finally: - conn.close() @router.patch("/library/book/{filename:path}") async def book_update(filename: str, request: Request): """Update book metadata and tags, and rename/move the file when needed.""" - old_path = _resolve_output_path(filename) + old_path = resolve_library_path(filename) if old_path is None or not old_path.exists(): return JSONResponse({"error": "not found"}, status_code=404) @@ -837,7 +689,7 @@ async def book_update(filename: str, request: Request): ) target_rel = _ensure_unique_rel_path(target_rel, exclude=old_path) new_filename = target_rel.as_posix() - new_path = (OUTPUT_DIR / target_rel).resolve() + new_path = (LIBRARY_DIR / target_rel).resolve() moved = False old_parent_to_prune: Path | None = None @@ -847,7 +699,6 @@ async def book_update(filename: str, request: Request): moved = True old_parent_to_prune = old_path.parent - conn = _db_conn() try: _sync_epub_metadata( new_path, @@ -863,94 +714,91 @@ async def book_update(filename: str, request: Request): subjects=(body.get("genres", []) + body.get("subgenres", []) + body.get("tags", [])), ) - with conn: - with conn.cursor() as cur: - cur.execute("SELECT has_cover FROM library WHERE filename = %s", (filename,)) - row = cur.fetchone() - has_cover = bool(row[0]) if row and row[0] is not None else bool(_scan_epub(new_path if moved else old_path).get("has_cover", False)) + with get_db_conn() as conn: + with conn: + with conn.cursor() as cur: + cur.execute("SELECT has_cover FROM library WHERE filename = %s", (filename,)) + row = cur.fetchone() + has_cover = bool(row[0]) if row and row[0] is not None else bool(scan_epub(new_path if moved else old_path).get("has_cover", False)) - cur.execute( - """ - INSERT INTO library ( - filename, title, author, publisher, has_cover, - series, series_index, publication_status, - source_url, publish_date, description, - archived, needs_review, updated_at + cur.execute( + """ + INSERT INTO library ( + filename, title, author, publisher, has_cover, + series, series_index, publication_status, + source_url, publish_date, description, + archived, needs_review, updated_at + ) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, NOW()) + ON CONFLICT (filename) DO UPDATE SET + title = EXCLUDED.title, + author = EXCLUDED.author, + publisher = EXCLUDED.publisher, + series = EXCLUDED.series, + series_index = EXCLUDED.series_index, + publication_status = EXCLUDED.publication_status, + source_url = EXCLUDED.source_url, + publish_date = EXCLUDED.publish_date, + description = EXCLUDED.description, + needs_review = FALSE, + updated_at = NOW() + """, + ( + new_filename, + title, + author, + publisher, + has_cover, + series, + series_index if series else 0, + body.get("publication_status", ""), + body.get("source_url", ""), + body.get("publish_date") or None, + body.get("description", ""), + ), ) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, NOW()) - ON CONFLICT (filename) DO UPDATE SET - title = EXCLUDED.title, - author = EXCLUDED.author, - publisher = EXCLUDED.publisher, - series = EXCLUDED.series, - series_index = EXCLUDED.series_index, - publication_status = EXCLUDED.publication_status, - source_url = EXCLUDED.source_url, - publish_date = EXCLUDED.publish_date, - description = EXCLUDED.description, - needs_review = FALSE, - updated_at = NOW() - """, - ( - new_filename, - title, - author, - publisher, - has_cover, - series, - series_index if series else 0, - body.get("publication_status", ""), - body.get("source_url", ""), - body.get("publish_date") or None, - body.get("description", ""), - ), - ) - if new_filename != filename: - cur.execute("UPDATE book_tags SET filename = %s WHERE filename = %s", (new_filename, filename)) - cur.execute("UPDATE reading_progress SET filename = %s WHERE filename = %s", (new_filename, filename)) - cur.execute("UPDATE reading_sessions SET filename = %s WHERE filename = %s", (new_filename, filename)) - cur.execute("UPDATE library_cover_cache SET filename = %s WHERE filename = %s", (new_filename, filename)) - cur.execute("DELETE FROM library WHERE filename = %s", (filename,)) + if new_filename != filename: + cur.execute("UPDATE book_tags SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE reading_progress SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE reading_sessions SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE library_cover_cache SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("DELETE FROM library WHERE filename = %s", (filename,)) - cur.execute("DELETE FROM book_tags WHERE filename = %s", (new_filename,)) - rows = ( - [(new_filename, g, "genre") for g in body.get("genres", []) if g] + - [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g] + - [(new_filename, g, "tag") for g in body.get("tags", []) if g] - ) - if rows: - cur.executemany( - "INSERT INTO book_tags (filename, tag, tag_type) VALUES (%s, %s, %s)" - " ON CONFLICT (filename, tag, tag_type) DO NOTHING", - rows, + cur.execute("DELETE FROM book_tags WHERE filename = %s", (new_filename,)) + rows = ( + [(new_filename, g, "genre") for g in body.get("genres", []) if g] + + [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g] + + [(new_filename, g, "tag") for g in body.get("tags", []) if g] ) + if rows: + cur.executemany( + "INSERT INTO book_tags (filename, tag, tag_type) VALUES (%s, %s, %s)" + " ON CONFLICT (filename, tag, tag_type) DO NOTHING", + rows, + ) + if old_parent_to_prune is not None: - _prune_empty_output_dirs(old_parent_to_prune) + prune_empty_dirs(old_parent_to_prune) return JSONResponse({"ok": True, "filename": new_filename, "renamed": new_filename != filename}) except Exception as e: if moved and new_path.exists() and not old_path.exists(): new_path.replace(old_path) return JSONResponse({"error": str(e)}, status_code=500) - finally: - conn.close() @router.get("/library/read/{filename:path}", response_class=HTMLResponse) async def reader_page(filename: str, request: Request): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return HTMLResponse("Not found", status_code=404) if not path.exists(): return HTMLResponse("Not found", status_code=404) - conn = _db_conn() - try: + with get_db_conn() as conn: with conn.cursor() as cur: cur.execute("SELECT title FROM library WHERE filename = %s", (filename,)) row = cur.fetchone() title = row[0] if row and row[0] else filename - finally: - conn.close() return templates.TemplateResponse(request, "reader.html", { "filename": filename, "title": title, @@ -959,7 +807,7 @@ async def reader_page(filename: str, request: Request): @router.get("/library/pdf/{filename:path}") async def library_pdf_page(filename: str, page: int = 0, dpi: int = 150): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "Invalid filename"}, status_code=400) if not path.exists(): @@ -977,7 +825,7 @@ async def library_pdf_page(filename: str, page: int = 0, dpi: int = 150): @router.get("/library/cbr/{filename:path}/{page:int}") async def library_cbr_page(filename: str, page: int): - path = _resolve_output_path(filename) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "Invalid filename"}, status_code=400) if not path.exists(): diff --git a/stack/novela.env b/stack/novela.env index e7b6ffb..e35d906 100644 --- a/stack/novela.env +++ b/stack/novela.env @@ -5,3 +5,9 @@ POSTGRES_PASSWORD=change-me # Required for credential encryption/decryption (Fernet) in DB. # Keep this stable after first use; changing it breaks decrypt of existing credentials. NOVELA_MASTER_KEY=change-me-long-random-secret + +# Dropbox root-map voor backup uploads (default: /novela) +DROPBOX_BACKUP_ROOT=/novela + +# Map voor backup manifest/config binnen container (default: config) +CONFIG_DIR=config