diff --git a/containers/novela/routers/backup.py b/containers/novela/routers/backup.py index 1a1cb2f..d039889 100644 --- a/containers/novela/routers/backup.py +++ b/containers/novela/routers/backup.py @@ -1,10 +1,11 @@ import asyncio +import base64 import hashlib import json import os import shutil import subprocess -from datetime import datetime, timezone +from datetime import date, datetime, timezone from pathlib import Path from tempfile import NamedTemporaryFile from urllib.parse import urlencode @@ -17,7 +18,12 @@ from fastapi.responses import HTMLResponse from shared_templates import templates from db import get_db_conn -from routers.common import scan_media, upsert_book +from routers.common import ( + scan_media, + upsert_book, + upsert_chapter, + upsert_cover_cache, +) from security import decrypt_value, encrypt_value, is_encrypted_value router = APIRouter() @@ -533,6 +539,169 @@ def _snapshot_name() -> str: return f"snapshot-{stamp}.json" +# ── Database-stored books (storage_type='db') ──────────────────────────────── +# These books have no file on disk: their content lives in PostgreSQL +# (library row + book_chapters + book_tags + library_cover_cache). Inline images +# referenced from chapters live on disk under library/images/ and are backed up +# as ordinary files. To make db-books restorable per-book (just like file books), +# each one is serialized to JSON and stored in the same content-addressed object +# store, then referenced from the snapshot with a "storage": "db" marker. + +_DB_LIBRARY_COLS = [ + "filename", + "media_type", + "storage_type", + "title", + "author", + "publisher", + "series", + "series_index", + "series_suffix", + "series_volume", + "publication_status", + "has_cover", + "description", + "source_url", + "publish_date", + "archived", + "want_to_read", + "needs_review", + "rating", + "created_at", +] + + +def _db_book_filenames(cur) -> list[str]: + cur.execute( + "SELECT filename FROM library WHERE storage_type = 'db' OR filename LIKE 'db/%' ORDER BY filename" + ) + return [r[0] for r in cur.fetchall()] + + +def _serialize_db_book(cur, filename: str) -> dict | None: + cols = _DB_LIBRARY_COLS + cur.execute( + f"SELECT {', '.join(cols)} FROM library WHERE filename = %s LIMIT 1", + (filename,), + ) + row = cur.fetchone() + if not row: + return None + lib: dict = {} + for col, val in zip(cols, row): + if isinstance(val, (datetime, date)): + val = val.isoformat() + lib[col] = val + + cur.execute( + "SELECT chapter_index, title, content FROM book_chapters WHERE filename = %s ORDER BY chapter_index", + (filename,), + ) + chapters = [ + {"chapter_index": r[0], "title": r[1] or "", "content": r[2] or ""} + for r in cur.fetchall() + ] + + cur.execute( + "SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag, tag_type", + (filename,), + ) + tags = [{"tag": r[0], "tag_type": r[1]} for r in cur.fetchall()] + + cur.execute( + "SELECT mime_type, thumb_webp FROM library_cover_cache WHERE filename = %s LIMIT 1", + (filename,), + ) + cover_row = cur.fetchone() + cover = None + if cover_row and cover_row[1] is not None: + cover = { + "mime_type": cover_row[0] or "image/webp", + "thumb_webp_b64": base64.b64encode(bytes(cover_row[1])).decode("ascii"), + } + + return { + "novela_db_book": 1, + "filename": filename, + "library": lib, + "chapters": chapters, + "tags": tags, + "cover": cover, + } + + +def _restore_db_book(filename: str, payload: dict) -> None: + """Re-create a db-stored book from a serialized snapshot object.""" + lib = dict(payload.get("library") or {}) + lib["filename"] = filename + lib.setdefault("storage_type", "db") + lib.setdefault("media_type", "epub") + + cols = [c for c in _DB_LIBRARY_COLS if c in lib] + if "filename" not in cols: + cols.insert(0, "filename") + + chapters = payload.get("chapters") or [] + tags = payload.get("tags") or [] + cover = payload.get("cover") + + col_list = ", ".join(cols) + placeholders = ", ".join(["%s"] * len(cols)) + updates = ", ".join(f"{c} = EXCLUDED.{c}" for c in cols if c != "filename") + values = [lib.get(c) for c in cols] + + with get_db_conn() as conn: + with conn: + with conn.cursor() as cur: + cur.execute( + f""" + INSERT INTO library ({col_list}) + VALUES ({placeholders}) + ON CONFLICT (filename) DO UPDATE SET + {updates}, + updated_at = NOW() + """, + values, + ) + + cur.execute("DELETE FROM book_chapters WHERE filename = %s", (filename,)) + + for ch in chapters: + try: + idx = int(ch.get("chapter_index")) + except (TypeError, ValueError): + continue + upsert_chapter(conn, filename, idx, ch.get("title", ""), ch.get("content", "")) + + with conn.cursor() as cur: + cur.execute("DELETE FROM book_tags WHERE filename = %s", (filename,)) + rows = [] + seen: set[tuple[str, str]] = set() + for t in tags: + tag = (t.get("tag") or "").strip() + ttype = (t.get("tag_type") or "").strip() + if not tag or not ttype: + continue + key = (tag.casefold(), ttype) + if key in seen: + continue + seen.add(key) + rows.append((filename, tag, ttype)) + if rows: + cur.executemany( + "INSERT INTO book_tags (filename, tag, tag_type) VALUES (%s, %s, %s) " + "ON CONFLICT (filename, tag, tag_type) DO NOTHING", + rows, + ) + + if cover and cover.get("thumb_webp_b64"): + try: + thumb = base64.b64decode(cover["thumb_webp_b64"]) + upsert_cover_cache(conn, filename, cover.get("mime_type", "image/webp"), thumb) + except Exception: + pass + + def _object_path(objects_root: str, sha256: str) -> str: return _dropbox_join(objects_root, sha256[:2], sha256) @@ -574,6 +743,61 @@ def _run_pg_dump() -> tuple[bytes, str]: tmp_path.unlink(missing_ok=True) +def _psql_base_args() -> list[str]: + return [ + "-h", + os.environ.get("POSTGRES_HOST", "postgres"), + "-p", + str(os.environ.get("POSTGRES_PORT", "5432")), + "-U", + os.environ.get("POSTGRES_USER", "novela"), + "-d", + os.environ.get("POSTGRES_DB", "novela"), + ] + + +def _run_pg_restore(dump_bytes: bytes) -> None: + """Restore a full PostgreSQL dump. + + Resets the public schema first so any plain pg_dump (with or without + --clean) restores cleanly into an empty schema. This is destructive: it + drops and recreates the entire public schema before applying the dump. + """ + env = os.environ.copy() + env["PGPASSWORD"] = os.environ.get("POSTGRES_PASSWORD", "") + base = _psql_base_args() + + reset = subprocess.run( + ["psql", *base, "-v", "ON_ERROR_STOP=1", "-c", "DROP SCHEMA public CASCADE; CREATE SCHEMA public;"], + env=env, + capture_output=True, + text=True, + ) + if reset.returncode != 0: + raise RuntimeError(f"schema reset failed: {(reset.stderr or '').strip()[:500] or 'unknown error'}") + + with NamedTemporaryFile(suffix=".sql", delete=False) as tmp: + tmp_path = Path(tmp.name) + tmp_path.write_bytes(dump_bytes) + + try: + proc = subprocess.run( + ["psql", *base, "-v", "ON_ERROR_STOP=1", "-f", str(tmp_path)], + env=env, + capture_output=True, + text=True, + ) + if proc.returncode != 0: + raise RuntimeError(f"psql restore failed: {(proc.stderr or '').strip()[:500] or 'unknown error'}") + finally: + tmp_path.unlink(missing_ok=True) + + +def _list_pg_dump_paths(client: dropbox.Dropbox, postgres_root: str) -> list[str]: + files = _dropbox_list_files_recursive(client, postgres_root) + return sorted([p for p in files if p.endswith(".sql")], reverse=True) + + def _has_running_backup() -> bool: with get_db_conn() as conn: with conn: @@ -765,6 +989,27 @@ def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> t uploaded_size += int(state["size"]) uploaded_count += 1 + # Database-stored books: serialize each into the content-addressed object + # store and reference it from the snapshot so it can be restored per-book. + with get_db_conn() as conn: + with conn.cursor() as cur: + db_filenames = _db_book_filenames(cur) + for fn in db_filenames: + payload = _serialize_db_book(cur, fn) + if payload is None: + continue + data = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") + sha256 = hashlib.sha256(data).hexdigest() + snapshot_files[fn] = {"size": len(data), "sha256": sha256, "storage": "db"} + object_target = _object_path(objects_root, sha256) + if client is not None: + if not _dropbox_exists(client, object_target): + uploaded_size += _dropbox_upload_bytes(client, object_target, data) + uploaded_count += 1 + else: + uploaded_size += len(data) + uploaded_count += 1 + _prog(total_files, total_files, "snapshot") snapshot = { @@ -927,6 +1172,7 @@ async def backup_dropbox_credentials_delete(): async def backup_health(): token_present = bool(_load_dropbox_token()) pg_dump_path = shutil.which("pg_dump") + psql_path = shutil.which("psql") dropbox_ok = False dropbox_error = None @@ -951,6 +1197,8 @@ async def backup_health(): "schedule_interval_hours": schedule_interval_hours, "pg_dump_available": bool(pg_dump_path), "pg_dump_path": pg_dump_path, + "psql_available": bool(psql_path), + "psql_path": psql_path, "library_exists": LIBRARY_DIR.exists(), "library_path": str(LIBRARY_DIR.resolve()), } @@ -1250,6 +1498,13 @@ def _parse_snapshot_date(name: str) -> str: return "" +def _entry_storage(rel: str, info: dict) -> str: + storage = str(info.get("storage") or "").strip().lower() + if storage: + return storage + return "db" if rel.startswith("db/") else "file" + + def _download_and_restore(client: dropbox.Dropbox, objects_root: str, rel: str, info: dict) -> None: sha256 = str(info.get("sha256") or "") if not sha256: @@ -1257,6 +1512,17 @@ def _download_and_restore(client: dropbox.Dropbox, objects_root: str, rel: str, obj_path = _object_path(objects_root, sha256) _meta, res = client.files_download(obj_path) data = res.content + + if _entry_storage(rel, info) == "db": + try: + payload = json.loads(data.decode("utf-8", errors="replace")) + except Exception as e: + raise ValueError(f"Invalid db-book snapshot object: {e}") + if not isinstance(payload, dict): + raise ValueError("db-book snapshot object is not an object") + _restore_db_book(rel, payload) + return + dest = LIBRARY_DIR / rel dest.parent.mkdir(parents=True, exist_ok=True) dest.write_bytes(data) @@ -1306,16 +1572,37 @@ async def snapshot_files(snapshot_name: str): return {"ok": False, "error": str(e), "files": []} files_data = snap.get("files", {}) - result = [ - { - "path": rel, - "size": info.get("size", 0), - "sha256": info.get("sha256", ""), - "exists_locally": (LIBRARY_DIR / rel).exists(), - } - for rel, info in sorted(files_data.items()) - if isinstance(info, dict) + + # db-books "exist" when their row is present in the library table, not on disk. + db_rels = [ + rel + for rel, info in files_data.items() + if isinstance(info, dict) and _entry_storage(rel, info) == "db" ] + existing_db: set[str] = set() + if db_rels: + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT filename FROM library WHERE filename = ANY(%s)", (db_rels,) + ) + existing_db = {r[0] for r in cur.fetchall()} + + result = [] + for rel, info in sorted(files_data.items()): + if not isinstance(info, dict): + continue + storage = _entry_storage(rel, info) + exists = rel in existing_db if storage == "db" else (LIBRARY_DIR / rel).exists() + result.append( + { + "path": rel, + "size": info.get("size", 0), + "sha256": info.get("sha256", ""), + "storage": storage, + "exists_locally": exists, + } + ) return {"ok": True, "snapshot": snapshot_name, "files": result} @@ -1365,3 +1652,69 @@ async def restore_files(request: Request): ok_count = sum(1 for r in results if r["ok"]) return {"ok": True, "restored": ok_count, "total": len(results), "results": results} + + +@router.get("/api/backup/postgres/dumps") +async def list_pg_dumps(): + try: + client = await asyncio.to_thread(_dbx) + except Exception as e: + return {"ok": False, "error": str(e), "dumps": []} + + dropbox_root = _load_dropbox_root() + postgres_root = _dropbox_join(dropbox_root, "postgres") + + try: + paths = await asyncio.to_thread(_list_pg_dump_paths, client, postgres_root) + except Exception as e: + return {"ok": False, "error": str(e), "dumps": []} + + dumps = [{"name": Path(p).name} for p in paths] + return {"ok": True, "dumps": dumps} + + +@router.post("/api/backup/postgres/restore") +async def restore_pg_dump(request: Request): + """Restore the entire PostgreSQL database from a Dropbox pg_dump. + + DESTRUCTIVE: drops and recreates the public schema before applying the + dump. This recovers everything, including database-stored books, but + overwrites the current database. + """ + body = {} + try: + body = await request.json() + except Exception: + pass + + name = Path((body.get("name") or "").strip()).name + if not name or not name.endswith(".sql"): + return {"ok": False, "error": "A valid .sql dump name is required"} + + if not shutil.which("psql"): + return {"ok": False, "error": "psql is not available in this container"} + + try: + client = await asyncio.to_thread(_dbx) + except Exception as e: + return {"ok": False, "error": str(e)} + + dropbox_root = _load_dropbox_root() + postgres_root = _dropbox_join(dropbox_root, "postgres") + dump_path = _dropbox_join(postgres_root, name) + + try: + def _download() -> bytes: + _meta, res = client.files_download(dump_path) + return res.content + + data = await asyncio.to_thread(_download) + except Exception as e: + return {"ok": False, "error": f"Failed to download dump: {e}"} + + try: + await asyncio.to_thread(_run_pg_restore, data) + except Exception as e: + return {"ok": False, "error": str(e)} + + return {"ok": True, "restored": name, "size_bytes": len(data)} diff --git a/containers/novela/templates/backup.html b/containers/novela/templates/backup.html index f18eea1..0dd08bd 100644 --- a/containers/novela/templates/backup.html +++ b/containers/novela/templates/backup.html @@ -242,7 +242,10 @@
Restore

- Browse a snapshot and restore individual books from Dropbox back to disk. + Browse a snapshot and restore individual books from Dropbox. File books are + written back to disk; database books (format DB) are re-inserted + into the library. Database books only appear in snapshots created after this + feature was added — to recover older database books, use Full Database Restore below.

+ + + + +
+
+
@@ -302,6 +325,7 @@ rowHtml('Snapshots keep', d.retention_count ?? 14), rowHtml('Schedule', d.schedule_enabled ? `enabled (${d.schedule_interval_hours || 24}h)` : 'disabled'), rowHtml('pg_dump', d.pg_dump_available ? (d.pg_dump_path || 'available') : 'missing'), + rowHtml('psql', d.psql_available ? (d.psql_path || 'available') : 'missing'), rowHtml('Library exists', fmtStatus(d.library_exists)), rowHtml('Library path', d.library_path || '-'), ].join(''); @@ -545,7 +569,7 @@ } async function refreshAll() { - await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory(), loadSnapshots()]); + await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory(), loadSnapshots(), loadPgDumps()]); pollRunProgress(); } @@ -648,13 +672,18 @@ return; } body.innerHTML = filtered.map(f => { - const ext = f.path.split('.').pop().toUpperCase(); + const isDb = f.storage === 'db'; + const ext = isDb ? 'DB' : f.path.split('.').pop().toUpperCase(); const parts = f.path.split('/'); const name = parts[parts.length - 1]; const dir = parts.slice(0, -1).join('/'); - const onDisk = f.exists_locally - ? '✓ exists' - : 'missing'; + const onDisk = isDb + ? (f.exists_locally + ? '✓ in library' + : 'not in library') + : (f.exists_locally + ? '✓ exists' + : 'missing'); return ` ${esc(ext)} @@ -722,6 +751,61 @@ } } + // ── Full database restore ──────────────────────────────────────────────── + + async function loadPgDumps() { + const sel = document.getElementById('pgdump-select'); + const btn = document.getElementById('btn-pg-restore'); + try { + const r = await fetch('/api/backup/postgres/dumps'); + const d = await r.json(); + if (!d.ok || !d.dumps.length) { + sel.innerHTML = ''; + btn.disabled = true; + return; + } + const current = sel.value; + sel.innerHTML = '' + + d.dumps.map(x => ``).join(''); + btn.disabled = !sel.value; + } catch (_) { + sel.innerHTML = ''; + btn.disabled = true; + } + } + + document.getElementById('pgdump-select').addEventListener('change', (e) => { + document.getElementById('btn-pg-restore').disabled = !e.target.value; + }); + + async function restorePgDump() { + const name = document.getElementById('pgdump-select').value; + const out = document.getElementById('pgdump-status'); + if (!name) return; + if (!confirm(`Restore the ENTIRE database from "${name}"?\n\nThis drops and replaces the current database. All current data will be lost. This cannot be undone.`)) return; + if (!confirm('Are you absolutely sure? This is your last chance to cancel.')) return; + const btn = document.getElementById('btn-pg-restore'); + btn.disabled = true; + out.className = 'status-line warn'; + out.textContent = `Restoring database from ${name}… (do not navigate away)`; + try { + const r = await fetch('/api/backup/postgres/restore', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({name}), + }); + const d = await r.json(); + if (!d.ok) throw new Error(d.error || 'failed'); + out.className = 'status-line ok'; + out.textContent = `Database restored from ${d.restored} (${fmtBytes(d.size_bytes)}). Reload the app to see the restored library.`; + } catch (e) { + out.className = 'status-line err'; + out.textContent = `Database restore failed: ${e}`; + } finally { + btn.disabled = false; + } + } + refreshAll(); diff --git a/containers/novela/version.py b/containers/novela/version.py index ab44aca..4253908 100644 --- a/containers/novela/version.py +++ b/containers/novela/version.py @@ -10,7 +10,7 @@ from __future__ import annotations from changelog import CHANGELOG -BUILD = 0 +BUILD = 1 def _release_version() -> str: diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md index dfb4784..dbaf738 100644 --- a/docs/changelog-develop.md +++ b/docs/changelog-develop.md @@ -1,5 +1,18 @@ # Develop Changelog +## 2026-06-01 — Backup/Restore: database-stored books are now restorable + +### Fixed +- Database-stored books (`storage_type='db'`, synthetic `db/...` filenames) could not be restored through the Backup → Restore option. The restore UI only listed and restored files from the on-disk library object store, but db-books have no file on disk — their content lives entirely in PostgreSQL (`book_chapters` + `library` row + `book_tags` + `library_cover_cache`). They were captured only in the full `pg_dump`, which the UI offered no way to restore. As a result db-books never appeared in the restore list and were effectively unrecoverable per-book. + +### Added +- **Per-book db restore.** The backup writer now serializes each database book to JSON (library row, all chapters, tags, and the cached cover) and stores it in the same content-addressed Dropbox object store used for file books, referenced from the snapshot with a `"storage": "db"` marker. Such books now appear in the Restore table (format **DB**) and can be restored individually; restore re-inserts the library row, chapters, tags and cover into PostgreSQL. Inline chapter images are unaffected — they already live on disk under `library/images/` and are backed up as ordinary files. + - `routers/backup.py`: new `_db_book_filenames`, `_serialize_db_book`, `_restore_db_book`, `_entry_storage` helpers; db-books serialized during `_run_backup_internal`; `_download_and_restore` branches on storage type; `/api/backup/snapshots/{name}/files` now reports `storage` and computes `exists_locally` for db-books from the `library` table. + - Note: db-books only appear in snapshots created *after* this change. Older backups still contain them only in the `pg_dump` — recover those via Full Database Restore below. +- **Full Database Restore.** New Backup-page card and endpoints to restore the entire PostgreSQL database from any Dropbox `pg_dump`. This recovers everything (all db-books, reading progress, tags, settings) from existing backups too. It is destructive: the public schema is dropped and recreated before the dump is applied, so any plain dump (with or without `--clean`) restores cleanly. Guarded behind a double confirmation in the UI. + - `routers/backup.py`: `_psql_base_args`, `_run_pg_restore`, `_list_pg_dump_paths`; endpoints `GET /api/backup/postgres/dumps` and `POST /api/backup/postgres/restore`; health endpoint now also reports `psql_available`/`psql_path`. + - `templates/backup.html`: db-books shown with **DB** format and "in library / not in library" status in the restore table; new "Full Database Restore" card with dump selector, double-confirm, and `psql` health row. + ## 2026-06-01 — Reader: Sepia reading theme ### Added