diff --git a/build-and-push.sh b/build-and-push.sh index 2665bdd..a2e7419 100755 --- a/build-and-push.sh +++ b/build-and-push.sh @@ -235,10 +235,11 @@ for svc_path in "${services[@]}"; do echo "============================================================" echo "[INFO] Building ${svc} -> tags: ${NEW_VERSION}, latest" echo "============================================================" - docker build -t "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:dev" "$svc_path" + docker build -t "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:latest" -t "${IMAGE_BASE}:dev" "$svc_path" docker push "${IMAGE_BASE}:${NEW_VERSION}" + docker push "${IMAGE_BASE}:latest" docker push "${IMAGE_BASE}:dev" - BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:dev") + BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:latest" "${IMAGE_BASE}:dev") else echo "============================================================" echo "[INFO] Test build ${svc} -> tag: latest" diff --git a/containers/novela/changelog.py b/containers/novela/changelog.py index 479a6ba..b57d900 100644 --- a/containers/novela/changelog.py +++ b/containers/novela/changelog.py @@ -3,6 +3,40 @@ Changelog data for Novela """ CHANGELOG = [ + { + "version": "v0.1.3", + "date": "2026-04-03", + "summary": "DB-stored books: chapters stored in PostgreSQL with full-text search, EPUB conversion, export, and a storage toggle in the grabber.", + "sections": [ + { + "title": "New feature", + "type": "feature", + "changes": [ + "DB-stored books: scraped books are now stored as chapters in PostgreSQL instead of EPUB files on disk — full-text search, content deduplication, and backup coverage are all handled automatically", + "Grabber stores chapters in book_chapters and images in a content-addressed imagestore (sha256-based, automatic deduplication across all books)", + "EPUB-to-DB conversion: Convert to DB button on any EPUB book detail page — extracts chapters, migrates all metadata and child rows (tags, progress, bookmarks, cover), removes the EPUB file", + "DB-to-EPUB export: Export EPUB button on DB-stored books — builds and streams a standards-compliant EPUB without writing a file to disk", + "Full-text search (/search): searches across all DB-stored chapter content via PostgreSQL FTS (tsvector / plainto_tsquery), returns highlighted snippets with direct links to the chapter position in the reader", + "Chapter editor supports DB-stored books: Monaco-based editor reads and writes book_chapters directly; chapter titles editable inline; title-only changes correctly included in Save All", + "Grabber: storage toggle on the Convert page — choose between DB storage and EPUB file before converting", + ], + }, + ], + }, + { + "version": "v0.1.2", + "date": "2026-04-02", + "summary": "Restore functionality on the Backup page.", + "sections": [ + { + "title": "New feature", + "type": "feature", + "changes": [ + "Restore functionality on the Backup page: browse any available Dropbox snapshot, see which files are currently missing from disk, and restore individual books or a selection back to the library — file is written to disk and immediately re-indexed", + ], + }, + ], + }, { "version": "v0.1.1", "date": "2026-03-31", diff --git a/containers/novela/main.py b/containers/novela/main.py index b36d4bb..d53f8aa 100644 --- a/containers/novela/main.py +++ b/containers/novela/main.py @@ -17,6 +17,7 @@ from routers import ( grabber_router, library_router, reader_router, + search_router, settings_router, ) @@ -46,6 +47,7 @@ app.include_router(builder_router) app.include_router(bulk_import_router) app.include_router(following_router) app.include_router(changelog_router) +app.include_router(search_router) @app.get("/") diff --git a/containers/novela/migrations.py b/containers/novela/migrations.py index 5fd144f..df01357 100644 --- a/containers/novela/migrations.py +++ b/containers/novela/migrations.py @@ -296,6 +296,59 @@ def migrate_rename_hiatus() -> None: _exec("UPDATE library SET publication_status = 'Long-Term Hold' WHERE publication_status = 'Hiatus'") +def migrate_add_storage_type() -> None: + _exec( + "ALTER TABLE library ADD COLUMN IF NOT EXISTS storage_type VARCHAR(10) NOT NULL DEFAULT 'file'" + ) + + +def migrate_create_book_images() -> None: + _exec( + """ + CREATE TABLE IF NOT EXISTS book_images ( + sha256 CHAR(64) PRIMARY KEY, + ext VARCHAR(10) NOT NULL, + media_type VARCHAR(100) NOT NULL, + size_bytes INTEGER NOT NULL DEFAULT 0 + ) + """ + ) + + +def migrate_create_book_chapters() -> None: + _exec( + """ + CREATE TABLE IF NOT EXISTS book_chapters ( + id SERIAL PRIMARY KEY, + filename VARCHAR(600) NOT NULL REFERENCES library(filename) ON DELETE CASCADE, + chapter_index INTEGER NOT NULL, + title VARCHAR(500) NOT NULL DEFAULT '', + content TEXT NOT NULL DEFAULT '', + content_tsv TSVECTOR, + UNIQUE (filename, chapter_index) + ) + """ + ) + _exec( + "CREATE INDEX IF NOT EXISTS idx_book_chapters_filename ON book_chapters (filename, chapter_index)" + ) + _exec( + "CREATE INDEX IF NOT EXISTS idx_book_chapters_tsv ON book_chapters USING GIN (content_tsv)" + ) + + +def migrate_rebuild_chapter_tsv_with_title() -> None: + """Rebuild content_tsv to include chapter title (safe to run repeatedly).""" + _exec( + """ + UPDATE book_chapters + SET content_tsv = to_tsvector('simple', + COALESCE(title, '') || ' ' || + regexp_replace(COALESCE(content, ''), '<[^>]*>', ' ', 'g')) + """ + ) + + def run_migrations() -> None: migrate_create_library() migrate_create_book_tags() @@ -314,3 +367,7 @@ def run_migrations() -> None: migrate_create_builder_drafts() migrate_create_authors() migrate_rename_hiatus() + migrate_add_storage_type() + migrate_create_book_images() + migrate_create_book_chapters() + migrate_rebuild_chapter_tsv_with_title() diff --git a/containers/novela/routers/__init__.py b/containers/novela/routers/__init__.py index beaae54..0fa2489 100644 --- a/containers/novela/routers/__init__.py +++ b/containers/novela/routers/__init__.py @@ -7,6 +7,7 @@ from routers.following import router as following_router from routers.grabber import router as grabber_router from routers.library import router as library_router from routers.reader import router as reader_router +from routers.search import router as search_router from routers.settings import router as settings_router __all__ = [ @@ -20,4 +21,5 @@ __all__ = [ "bulk_import_router", "following_router", "changelog_router", + "search_router", ] diff --git a/containers/novela/routers/backup.py b/containers/novela/routers/backup.py index 9b218a2..430dd09 100644 --- a/containers/novela/routers/backup.py +++ b/containers/novela/routers/backup.py @@ -17,6 +17,7 @@ from fastapi.responses import HTMLResponse from fastapi.templating import Jinja2Templates from db import get_db_conn +from routers.common import scan_media, upsert_book from security import decrypt_value, encrypt_value, is_encrypted_value templates = Jinja2Templates(directory="templates") @@ -1196,3 +1197,131 @@ async def run_backup(request: Request): "message": "Backup started in background.", "started_at": _now_iso(), } + + +def _parse_snapshot_date(name: str) -> str: + """Parse 'snapshot-20260329-123456.json' → '2026-03-29T12:34:56Z'.""" + stem = Path(name).stem # snapshot-20260329-123456 + parts = stem.split("-") + if len(parts) >= 3: + d, t = parts[1], parts[2] + if len(d) == 8 and len(t) == 6: + return f"{d[:4]}-{d[4:6]}-{d[6:]}T{t[:2]}:{t[2:4]}:{t[4:]}Z" + return "" + + +def _download_and_restore(client: dropbox.Dropbox, objects_root: str, rel: str, info: dict) -> None: + sha256 = str(info.get("sha256") or "") + if not sha256: + raise ValueError("No sha256 in snapshot entry") + obj_path = _object_path(objects_root, sha256) + _meta, res = client.files_download(obj_path) + data = res.content + dest = LIBRARY_DIR / rel + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_bytes(data) + meta = scan_media(dest) + tags = [(s, "subject") for s in meta.get("subjects", [])] + with get_db_conn() as conn: + with conn: + upsert_book(conn, rel, meta, tags) + + +@router.get("/api/backup/snapshots") +async def list_snapshots(): + try: + client = await asyncio.to_thread(_dbx) + except Exception as e: + return {"ok": False, "error": str(e), "snapshots": []} + + dropbox_root = _load_dropbox_root() + snapshots_root = _dropbox_join(dropbox_root, "library_snapshots") + + try: + paths = await asyncio.to_thread(_list_snapshot_paths, client, snapshots_root) + except Exception as e: + return {"ok": False, "error": str(e), "snapshots": []} + + snapshots = [ + {"name": Path(p).name, "created_at": _parse_snapshot_date(Path(p).name)} + for p in paths + ] + return {"ok": True, "snapshots": snapshots} + + +@router.get("/api/backup/snapshots/{snapshot_name}/files") +async def snapshot_files(snapshot_name: str): + try: + client = await asyncio.to_thread(_dbx) + except Exception as e: + return {"ok": False, "error": str(e), "files": []} + + dropbox_root = _load_dropbox_root() + snapshots_root = _dropbox_join(dropbox_root, "library_snapshots") + snapshot_path = _dropbox_join(snapshots_root, snapshot_name) + + try: + snap = await asyncio.to_thread(_load_snapshot_data, client, snapshot_path) + except Exception as e: + return {"ok": False, "error": str(e), "files": []} + + files_data = snap.get("files", {}) + result = [ + { + "path": rel, + "size": info.get("size", 0), + "sha256": info.get("sha256", ""), + "exists_locally": (LIBRARY_DIR / rel).exists(), + } + for rel, info in sorted(files_data.items()) + if isinstance(info, dict) + ] + return {"ok": True, "snapshot": snapshot_name, "files": result} + + +@router.post("/api/backup/restore") +async def restore_files(request: Request): + body = {} + try: + body = await request.json() + except Exception: + pass + + snapshot_name = (body.get("snapshot_name") or "").strip() + files_to_restore: list[str] = body.get("files", []) + + if not snapshot_name: + return {"ok": False, "error": "snapshot_name is required"} + if not files_to_restore: + return {"ok": False, "error": "No files specified"} + + try: + client = await asyncio.to_thread(_dbx) + except Exception as e: + return {"ok": False, "error": str(e)} + + dropbox_root = _load_dropbox_root() + snapshots_root = _dropbox_join(dropbox_root, "library_snapshots") + objects_root = _dropbox_join(dropbox_root, "library_objects") + snapshot_path = _dropbox_join(snapshots_root, snapshot_name) + + try: + snap = await asyncio.to_thread(_load_snapshot_data, client, snapshot_path) + except Exception as e: + return {"ok": False, "error": f"Failed to load snapshot: {e}"} + + files_data = snap.get("files", {}) + + results = [] + for rel in files_to_restore: + if rel not in files_data: + results.append({"path": rel, "ok": False, "error": "Not found in snapshot"}) + continue + try: + await asyncio.to_thread(_download_and_restore, client, objects_root, rel, files_data[rel]) + results.append({"path": rel, "ok": True}) + except Exception as e: + results.append({"path": rel, "ok": False, "error": str(e)}) + + ok_count = sum(1 for r in results if r["ok"]) + return {"ok": True, "restored": ok_count, "total": len(results), "results": results} diff --git a/containers/novela/routers/common.py b/containers/novela/routers/common.py index b782195..faaec72 100644 --- a/containers/novela/routers/common.py +++ b/containers/novela/routers/common.py @@ -1,4 +1,5 @@ import base64 +import hashlib import html as _html import io import posixpath @@ -18,10 +19,16 @@ from pdf import pdf_cover_thumb, pdf_page_count, pdf_scan_metadata LIBRARY_DIR = Path("library") LIBRARY_DIR.mkdir(exist_ok=True) LIBRARY_ROOT = LIBRARY_DIR.resolve() +IMAGES_DIR = LIBRARY_DIR / "images" COVER_W = 300 COVER_H = 450 +def is_db_filename(filename: str) -> bool: + """True if the filename is a synthetic DB-stored book path (no file on disk).""" + return (filename or "").startswith("db/") + + def clean_segment(value: str, fallback: str, max_len: int) -> str: txt = re.sub(r"\s+", " ", (value or "").strip()) txt = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", txt) @@ -78,6 +85,17 @@ def coerce_series_index(value: int | str | None) -> int: def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, series: str, series_index: int | str | None, series_suffix: str = "", ext: str = "") -> Path: + if media_type == "db": + pub = clean_segment(publisher, "Unknown Publisher", 80) + auth = clean_segment(author, "Unknown Author", 80) + ttl = clean_segment(title, "Untitled", 140) + series_name = clean_segment(series, "", 80) + if series_name: + idx = coerce_series_index(series_index) + sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] + return Path("db") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}" + return Path("db") / pub / auth / ttl + if media_type == "epub": pub = clean_segment(publisher, "Unknown Publisher", 80) auth = clean_segment(author, "Unknown Author", 80) @@ -339,12 +357,13 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N with conn.cursor() as cur: cur.execute( """ - INSERT INTO library (filename, media_type, title, author, publisher, has_cover, + INSERT INTO library (filename, media_type, storage_type, title, author, publisher, has_cover, series, series_index, series_suffix, publication_status, source_url, publish_date, description, needs_review, want_to_read, rating, updated_at) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, %s, NOW()) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, %s, NOW()) ON CONFLICT (filename) DO UPDATE SET media_type = EXCLUDED.media_type, + storage_type = EXCLUDED.storage_type, title = COALESCE(NULLIF(EXCLUDED.title, ''), library.title), author = COALESCE(NULLIF(EXCLUDED.author, ''), library.author), publisher = COALESCE(NULLIF(EXCLUDED.publisher, ''), library.publisher), @@ -362,6 +381,7 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N ( filename, meta.get("media_type", "epub"), + meta.get("storage_type", "file"), meta.get("title", ""), meta.get("author", ""), meta.get("publisher", ""), @@ -413,6 +433,7 @@ def list_library_json() -> list[dict]: (cc.filename IS NOT NULL) AS has_cached_cover, l.rating, COALESCE(l.series_suffix, '') AS series_suffix, + COALESCE(l.storage_type, 'file') AS storage_type, json_agg( json_build_object('tag', bt.tag, 'tag_type', bt.tag_type) ) FILTER (WHERE bt.tag IS NOT NULL) AS tags @@ -430,7 +451,7 @@ def list_library_json() -> list[dict]: l.archived, l.needs_review, l.updated_at, rp.progress, rp.cfi, rp.page, rs.read_count, rs.last_read, - cc.filename, l.rating, l.series_suffix + cc.filename, l.rating, l.series_suffix, l.storage_type ORDER BY COALESCE(l.publisher, ''), COALESCE(l.author, ''), COALESCE(l.series, ''), l.series_index, COALESCE(l.title, '') """ ) @@ -460,13 +481,92 @@ def list_library_json() -> list[dict]: "page": r[15], "read_count": r[16] or 0, "last_read": r[17].isoformat() if r[17] else None, - "tags": r[21] or [], + "storage_type": r[21] or "file", + "tags": r[22] or [], "rating": r[19] or 0, } ) return out +_IMAGE_EXT_MAP = { + "image/jpeg": ".jpg", + "image/png": ".png", + "image/webp": ".webp", + "image/gif": ".gif", +} + + +def write_image_file(data: bytes, media_type: str) -> tuple[str, str, str]: + """Write image bytes to the content-addressed imagestore (no DB). + + Returns (sha256, ext, url). + """ + sha256 = hashlib.sha256(data).hexdigest() + ext = _IMAGE_EXT_MAP.get(media_type, ".jpg") + img_path = IMAGES_DIR / sha256[:2] / f"{sha256}{ext}" + if not img_path.exists(): + img_path.parent.mkdir(parents=True, exist_ok=True) + img_path.write_bytes(data) + url = f"/library/db-images/{sha256[:2]}/{sha256}{ext}" + return sha256, ext, url + + +def store_db_image(conn, data: bytes, media_type: str) -> tuple[str, str, str]: + """Write image to imagestore and register in book_images table. + + Returns (sha256, ext, url). + """ + sha256, ext, url = write_image_file(data, media_type) + with conn.cursor() as cur: + cur.execute( + """ + INSERT INTO book_images (sha256, ext, media_type, size_bytes) + VALUES (%s, %s, %s, %s) + ON CONFLICT (sha256) DO NOTHING + """, + (sha256, ext, media_type, len(data)), + ) + return sha256, ext, url + + +def html_to_plain(html: str) -> str: + """Strip HTML tags for tsvector input.""" + from bs4 import BeautifulSoup + return BeautifulSoup(html, "html.parser").get_text(" ", strip=True) + + +def upsert_chapter(conn, filename: str, chapter_index: int, title: str, content_html: str) -> None: + """Insert or replace a chapter in book_chapters and update its tsvector.""" + plain = html_to_plain(content_html) + tsv_input = (title or "") + " " + plain + with conn.cursor() as cur: + cur.execute( + """ + INSERT INTO book_chapters (filename, chapter_index, title, content, content_tsv) + VALUES (%s, %s, %s, %s, to_tsvector('simple', %s)) + ON CONFLICT (filename, chapter_index) DO UPDATE SET + title = EXCLUDED.title, + content = EXCLUDED.content, + content_tsv = EXCLUDED.content_tsv + """, + (filename, chapter_index, title, content_html, tsv_input), + ) + + +def ensure_unique_db_filename(conn, base_filename: str) -> str: + """Return a filename that doesn't yet exist in the library table.""" + candidate = base_filename + counter = 2 + while True: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM library WHERE filename = %s", (candidate,)) + if not cur.fetchone(): + return candidate + candidate = f"{base_filename} ({counter})" + counter += 1 + + def normalize_site(raw: str) -> str: raw = (raw or "").strip() if "://" in raw: diff --git a/containers/novela/routers/editor.py b/containers/novela/routers/editor.py index b3bc9f7..0705629 100644 --- a/containers/novela/routers/editor.py +++ b/containers/novela/routers/editor.py @@ -12,7 +12,7 @@ from fastapi.templating import Jinja2Templates from db import get_db_conn from epub import read_epub_file, write_epub_file -from routers.common import LIBRARY_DIR, resolve_library_path +from routers.common import LIBRARY_DIR, is_db_filename, resolve_library_path, upsert_chapter router = APIRouter() templates = Jinja2Templates(directory="templates") @@ -158,21 +158,40 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat @router.get("/library/editor/{filename:path}", response_class=HTMLResponse) async def editor_page(filename: str, request: Request): - path = resolve_library_path(filename) - if path is None or not path.exists(): - return HTMLResponse("Not found", status_code=404) + if not is_db_filename(filename): + path = resolve_library_path(filename) + if path is None or not path.exists(): + return HTMLResponse("Not found", status_code=404) with get_db_conn() as conn: with conn.cursor() as cur: cur.execute("SELECT title FROM library WHERE filename = %s", (filename,)) row = cur.fetchone() - title = row[0] if row and row[0] else filename + if not row: + return HTMLResponse("Not found", status_code=404) + title = row[0] if row[0] else filename - return templates.TemplateResponse(request, "editor.html", {"filename": filename, "title": title}) + return templates.TemplateResponse(request, "editor.html", { + "filename": filename, + "title": title, + "is_db": is_db_filename(filename), + }) @router.get("/api/edit/chapter/{index:int}/{filename:path}") async def get_edit_chapter(filename: str, index: int): + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT title, content FROM book_chapters WHERE filename = %s AND chapter_index = %s", + (filename, index), + ) + row = cur.fetchone() + if not row: + return Response(status_code=404) + return JSONResponse({"index": index, "href": f"db:{index}", "title": row[0], "content": row[1]}) + path = resolve_library_path(filename) if path is None or not path.exists(): return Response(status_code=404) @@ -186,13 +205,29 @@ async def get_edit_chapter(filename: str, index: int): @router.post("/api/edit/chapter/{index:int}/{filename:path}") async def save_edit_chapter(filename: str, index: int, request: Request): + body = await request.json() + content = body.get("content", "") + + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT title FROM book_chapters WHERE filename = %s AND chapter_index = %s", + (filename, index), + ) + row = cur.fetchone() + if not row: + return JSONResponse({"error": "Chapter not found"}, status_code=404) + new_title = (body.get("title") or "").strip() or row[0] + with conn: + upsert_chapter(conn, filename, index, new_title, content) + return JSONResponse({"ok": True}) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "not found"}, status_code=404) if not path.exists(): return JSONResponse({"error": "File not found"}, status_code=404) - body = await request.json() - content = body.get("content", "") if not content: return JSONResponse({"error": "No content"}, status_code=400) spine = _epub_spine(path) @@ -208,15 +243,42 @@ async def save_edit_chapter(filename: str, index: int, request: Request): @router.post("/api/edit/chapter/add/{filename:path}") async def add_edit_chapter(filename: str, request: Request): + body = await request.json() + title = (body.get("title") or "New chapter").strip() or "New chapter" + after_index = body.get("after_index", -1) + + if is_db_filename(filename): + try: + after_index = int(after_index) + except Exception: + after_index = -1 + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) FROM book_chapters WHERE filename = %s", (filename,)) + total = cur.fetchone()[0] + cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,)) + if not cur.fetchone(): + return JSONResponse({"error": "not found"}, status_code=404) + insert_idx = total if after_index < 0 or after_index >= total else after_index + 1 + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE book_chapters SET chapter_index = chapter_index + 1 WHERE filename = %s AND chapter_index >= %s", + (filename, insert_idx), + ) + upsert_chapter(conn, filename, insert_idx, title, "") + return JSONResponse({"ok": True, "index": insert_idx, "count": total + 1}) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "not found"}, status_code=404) if not path.exists(): return JSONResponse({"error": "File not found"}, status_code=404) - body = await request.json() - title = (body.get("title") or "New chapter").strip() or "New chapter" - after_index = body.get("after_index", -1) + try: + after_index = int(after_index) + except Exception: + after_index = -1 try: after_index = int(after_index) except Exception: @@ -339,6 +401,26 @@ async def add_edit_chapter(filename: str, request: Request): @router.delete("/api/edit/chapter/{index:int}/{filename:path}") async def delete_edit_chapter(filename: str, index: int): + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) FROM book_chapters WHERE filename = %s", (filename,)) + total = cur.fetchone()[0] + if total <= 1: + return JSONResponse({"error": "Cannot delete the last chapter"}, status_code=400) + with conn: + with conn.cursor() as cur: + cur.execute( + "DELETE FROM book_chapters WHERE filename = %s AND chapter_index = %s", + (filename, index), + ) + cur.execute( + "UPDATE book_chapters SET chapter_index = chapter_index - 1 WHERE filename = %s AND chapter_index > %s", + (filename, index), + ) + new_total = total - 1 + return JSONResponse({"ok": True, "index": min(index, new_total - 1), "count": new_total}) + path = resolve_library_path(filename) if path is None: return JSONResponse({"error": "not found"}, status_code=404) diff --git a/containers/novela/routers/grabber.py b/containers/novela/routers/grabber.py index 102fd12..34ad719 100644 --- a/containers/novela/routers/grabber.py +++ b/containers/novela/routers/grabber.py @@ -17,11 +17,16 @@ from db import get_db_conn from epub import detect_image_format, make_chapter_xhtml, make_epub from routers.common import ( LIBRARY_DIR, - ensure_cover_cache_for_book, + ensure_unique_db_filename, ensure_unique_rel_path, + make_cover_thumb_webp, make_rel_path, normalize_site, + store_db_image, upsert_book, + upsert_chapter, + upsert_cover_cache, + write_image_file, ) from scrapers import get_scraper from scrapers.base import HEADERS @@ -135,22 +140,87 @@ async def debug_run(request: Request): result: dict = {} try: async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: + # Login + login_success = False if username: - await scraper.login(client, username, password) + login_success = await scraper.login(client, username, password) + result["login"] = { + "attempted": bool(username), + "success": login_success, + "username": username, + } + book = await scraper.fetch_book_info(client, url) - result = { + chapters = book.get("chapters", []) + + # Compute output filename + series = book.get("series", "") + series_index = int(book.get("series_index_hint", 1) or 1) + filename = make_rel_path( + media_type="epub", + publisher=book.get("publisher", ""), + author=book.get("author", ""), + title=book.get("title", ""), + series=series, + series_index=series_index, + ).as_posix() + + result["meta"] = { "title": book.get("title", ""), "author": book.get("author", ""), "publisher": book.get("publisher", ""), "series": book.get("series", ""), - "chapter_count": len(book.get("chapters", [])), - "chapter_method": book.get("chapter_method", ""), "genres": book.get("genres", []), "subgenres": book.get("subgenres", []), "tags": book.get("tags", []), "description": book.get("description", ""), + "updated_date": book.get("updated_date", ""), "publication_status": book.get("publication_status", ""), + "filename": filename, } + + result["chapters"] = { + "count": len(chapters), + "method": book.get("chapter_method", ""), + "list": chapters, + } + + # Fetch first chapter + if chapters: + ch = chapters[0] + try: + _load_break_patterns() + ch_data = await scraper.fetch_chapter(client, ch) + content_el = ch_data.get("content_el") + raw_html = content_el.decode_contents() if content_el else "" + + xhtml_parts = [] + if content_el: + from bs4 import Tag + all_p = content_el.find_all("p") + empty_p = sum( + 1 for p in all_p + if not [c for c in p.children if isinstance(c, Tag)] + and not p.get_text().replace("\xa0", "").strip() + ) + filled_p = len(all_p) - empty_p + empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5 + for child in content_el.children: + part = element_to_xhtml(child, empty_p_is_spacer=empty_p_is_spacer) + if part.strip(): + xhtml_parts.append(part) + + result["first_chapter"] = { + "title": ch_data.get("title", ch["title"]), + "url": ch["url"], + "selector_id": ch_data.get("selector_id"), + "selector_class": ch_data.get("selector_class"), + "raw_html": raw_html[:8000], + "converted_xhtml": "\n".join(xhtml_parts)[:8000], + } + except Exception as e: + result["first_chapter"] = {"title": ch["title"], "url": ch["url"], "error": str(e)} + except Exception: result["error"] = traceback.format_exc() return result @@ -330,8 +400,8 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) } _load_break_patterns() - break_img_data = open("static/break.png", "rb").read() + # Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]} chapters = [] for i, ch in enumerate(book["chapters"], 1): send("progress", {"current": i, "total": len(book["chapters"]), "title": ch["title"]}) @@ -339,11 +409,11 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) ch_data = await scraper.fetch_chapter(client, ch) content_el = ch_data["content_el"] - chapter_images = [] + # Download images and store to disk (no DB yet); rewrite src to absolute URL if content_el: - img_counter = 1 for img_tag in content_el.find_all("img"): if is_break_element(img_tag): + img_tag.decompose() continue src = img_tag.get("src", "") if not src or src.startswith("data:"): @@ -352,19 +422,16 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) try: img_resp = await client.get(urljoin(ch["url"], src)) if img_resp.status_code == 200: - img_name, img_mime = detect_image_format( - img_resp.content, f"ch{i:03d}_img{img_counter:03d}" + _, img_mime = detect_image_format( + img_resp.content, f"ch{i:03d}_img" ) - img_tag["src"] = f"../Images/{img_name}" + sha, ext_i, url = write_image_file(img_resp.content, img_mime) + img_tag["src"] = url img_tag["alt"] = img_tag.get("alt", "") - chapter_images.append( - { - "epub_path": f"OEBPS/Images/{img_name}", - "data": img_resp.content, - "media_type": img_mime, - } - ) - img_counter += 1 + img_tag.attrs = { + k: v for k, v in img_tag.attrs.items() + if k in ("src", "alt", "width", "height") + } else: img_tag.decompose() except Exception: @@ -386,9 +453,8 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) if part.strip(): xhtml_parts.append(part) - content_xhtml = "\n".join(xhtml_parts) - chapter_xhtml = make_chapter_xhtml(ch_data["title"], content_xhtml, i) - chapters.append({"title": ch_data["title"], "xhtml": chapter_xhtml, "images": chapter_images}) + content_html = "\n".join(xhtml_parts) + chapters.append({"title": ch_data["title"], "content_html": content_html}) await asyncio.sleep(0.2) except Exception as e: send("warning", {"message": f"Chapter {i} skipped: {e}"}) @@ -398,12 +464,30 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) job["done"] = True return - send("status", {"message": "Building EPUB..."}) - book_id = str(uuid.uuid4()) - epub_bytes = make_epub(book_title, author, chapters, cover_data, break_img_data, book_id, book_info) + storage_mode = job.get("storage_mode", "db") + send("status", {"message": "Saving to library..."}) - rel = ensure_unique_rel_path( - make_rel_path( + book_tags = ( + [(g, "genre") for g in book_info.get("genres", [])] + + [(g, "subgenre") for g in book_info.get("subgenres", [])] + + [(g, "tag") for g in book_info.get("tags", [])] + ) + + if storage_mode == "epub": + # ── EPUB file on disk ────────────────────────────────────────── + epub_chapters = [ + {"title": ch["title"], "xhtml": make_chapter_xhtml(ch["title"], ch["content_html"], i + 1), "images": []} + for i, ch in enumerate(chapters) + ] + try: + break_img_data = open("static/break.png", "rb").read() + except Exception: + break_img_data = b"" + epub_bytes = make_epub( + book_title, author, epub_chapters, cover_data, break_img_data, + str(uuid.uuid4()), book_info, + ) + rel_path = make_rel_path( media_type="epub", publisher=book_info.get("publisher", ""), author=author, @@ -411,40 +495,78 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) series=series, series_index=series_index, ) - ) - out_path = LIBRARY_DIR / rel - out_path.parent.mkdir(parents=True, exist_ok=True) - out_path.write_bytes(epub_bytes) + rel_path = ensure_unique_rel_path(rel_path) + out_path = LIBRARY_DIR / rel_path + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_bytes(epub_bytes) + rel_filename = rel_path.as_posix() + + book_meta = { + "media_type": "epub", + "storage_type": "file", + "has_cover": cover_data is not None, + "series": series, + "series_index": series_index if series else 0, + "title": book_title, + "publication_status": book_info.get("publication_status", ""), + "author": author, + "publisher": book_info.get("publisher", ""), + "source_url": book_info.get("source_url", ""), + "description": book_info.get("description", ""), + "publish_date": final_updated_date, + "needs_review": False, + } + with get_db_conn() as conn: + with conn: + upsert_book(conn, rel_filename, book_meta, book_tags) + if cover_data: + try: + thumb = make_cover_thumb_webp(cover_data) + upsert_cover_cache(conn, rel_filename, "image/webp", thumb) + except Exception: + pass + + else: + # ── DB storage (default) ─────────────────────────────────────── + base_filename = make_rel_path( + media_type="db", + publisher=book_info.get("publisher", ""), + author=author, + title=book_title, + series=series, + series_index=series_index, + ).as_posix() + + book_meta = { + "media_type": "epub", + "storage_type": "db", + "has_cover": cover_data is not None, + "series": book_info.get("series", ""), + "series_index": series_index if book_info.get("series") else 0, + "title": book_title, + "publication_status": book_info.get("publication_status", ""), + "author": author, + "publisher": book_info.get("publisher", ""), + "source_url": book_info.get("source_url", ""), + "description": book_info.get("description", ""), + "publish_date": final_updated_date, + "needs_review": False, + } + with get_db_conn() as conn: + with conn: + rel_filename = ensure_unique_db_filename(conn, base_filename) + upsert_book(conn, rel_filename, book_meta, book_tags) + for idx, ch in enumerate(chapters): + upsert_chapter(conn, rel_filename, idx, ch["title"], ch["content_html"]) + if cover_data: + try: + thumb = make_cover_thumb_webp(cover_data) + upsert_cover_cache(conn, rel_filename, "image/webp", thumb) + except Exception: + pass - rel_filename = rel.as_posix() job["filename"] = rel_filename - - book_meta = { - "media_type": "epub", - "has_cover": cover_data is not None, - "series": book_info.get("series", ""), - "series_index": series_index if book_info.get("series") else 0, - "title": book_title, - "publication_status": book_info.get("publication_status", ""), - "author": author, - "publisher": book_info.get("publisher", ""), - "source_url": book_info.get("source_url", ""), - "description": book_info.get("description", ""), - "publish_date": final_updated_date, - "needs_review": False, - } - book_tags = ( - [(g, "genre") for g in book_info.get("genres", [])] - + [(g, "subgenre") for g in book_info.get("subgenres", [])] - + [(g, "tag") for g in book_info.get("tags", [])] - ) - - with get_db_conn() as conn: - with conn: - upsert_book(conn, rel_filename, book_meta, book_tags) - ensure_cover_cache_for_book(conn, rel_filename, out_path, "epub") - - send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters)}) + send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters), "storage_type": storage_mode}) job["done"] = True @@ -471,6 +593,7 @@ async def convert(request: Request): job["series_index"] = int(body.get("series_index", 1) or 1) job["updated_date_override"] = (body.get("updated_date") or "").strip() + job["storage_mode"] = "epub" if body.get("storage_mode") == "epub" else "db" JOBS[job_id] = job asyncio.create_task(scrape_book(job_id, url, username, password)) diff --git a/containers/novela/routers/library.py b/containers/novela/routers/library.py index e0ca3ee..e040dc4 100644 --- a/containers/novela/routers/library.py +++ b/containers/novela/routers/library.py @@ -15,6 +15,7 @@ from routers.common import ( LIBRARY_DIR, ensure_cover_cache_for_book, ensure_unique_rel_path, + is_db_filename, list_library_json, make_cover_thumb_webp, make_rel_path, @@ -175,6 +176,17 @@ async def library_download(filename: str): @router.delete("/library/file/{filename:path}") async def library_delete(filename: str): + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,)) + if not cur.fetchone(): + return {"error": "Not found"} + with conn: + with conn.cursor() as cur: + cur.execute("DELETE FROM library WHERE filename = %s", (filename,)) + return {"ok": True} + full = resolve_library_path(filename) if full is None: return {"error": "Invalid filename"} @@ -233,9 +245,10 @@ async def library_bulk_delete(request: Request): @router.get("/library/cover-cached/{filename:path}") async def library_cover_cached(filename: str): - full = resolve_library_path(filename) - if full is None or not full.exists(): - return Response(status_code=404) + if not is_db_filename(filename): + full = resolve_library_path(filename) + if full is None or not full.exists(): + return Response(status_code=404) with get_db_conn() as conn: with conn: @@ -266,6 +279,19 @@ async def library_cover_cached(filename: str): @router.get("/library/cover/{filename:path}") async def library_cover(filename: str): + if is_db_filename(filename): + # DB books: cover is always served from the cache + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s", + (filename,), + ) + row = cur.fetchone() + if row and row[0]: + return Response(content=bytes(row[0]), media_type="image/webp") + return Response(status_code=404) + full = resolve_library_path(filename) if full is None or not full.exists(): return Response(status_code=404) diff --git a/containers/novela/routers/reader.py b/containers/novela/routers/reader.py index e0bb980..8f5b6fc 100644 --- a/containers/novela/routers/reader.py +++ b/containers/novela/routers/reader.py @@ -16,9 +16,22 @@ from fastapi.templating import Jinja2Templates from cbr import cbr_get_page, cbr_page_count from db import get_db_conn -from epub import read_epub_file, write_epub_file +from epub import make_chapter_xhtml, make_epub, read_epub_file, write_epub_file from pdf import pdf_page_count, pdf_render_page -from routers.common import LIBRARY_DIR, prune_empty_dirs, resolve_library_path, scan_epub +from routers.common import ( + IMAGES_DIR, + LIBRARY_DIR, + ensure_unique_db_filename, + is_db_filename, + make_cover_thumb_webp, + make_rel_path, + prune_empty_dirs, + resolve_library_path, + scan_epub, + upsert_chapter, + upsert_cover_cache, + write_image_file, +) router = APIRouter() templates = Jinja2Templates(directory="templates") @@ -459,6 +472,21 @@ def _guard(filename: str) -> bool: # Routes # --------------------------------------------------------------------------- +@router.get("/library/db-images/{path:path}") +async def serve_db_image(path: str): + """Serve an image from the content-addressed imagestore.""" + img_path = (IMAGES_DIR / path).resolve() + try: + img_path.relative_to(IMAGES_DIR.resolve()) + except ValueError: + return Response(status_code=404) + if not img_path.exists(): + return Response(status_code=404) + ext = img_path.suffix.lower() + mt = {".jpg": "image/jpeg", ".png": "image/png", ".webp": "image/webp", ".gif": "image/gif"}.get(ext, "application/octet-stream") + return FileResponse(img_path, media_type=mt) + + @router.get("/library/epub/{filename:path}") async def library_epub(filename: str): """Serve EPUB inline (no Content-Disposition: attachment) for the reader.""" @@ -472,6 +500,18 @@ async def library_epub(filename: str): @router.get("/library/chapters/{filename:path}") async def get_chapter_list(filename: str): + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT chapter_index, title FROM book_chapters WHERE filename = %s ORDER BY chapter_index", + (filename,), + ) + rows = cur.fetchall() + if not rows: + return Response(status_code=404) + return [{"index": r[0], "title": r[1], "href": f"db:{r[0]}"} for r in rows] + path = resolve_library_path(filename) if path is None: return Response(status_code=404) @@ -482,7 +522,24 @@ async def get_chapter_list(filename: str): @router.get("/library/chapter/{index}/{filename:path}") async def get_chapter_html(filename: str, index: int): - """Extract a single chapter from the EPUB and return it as an HTML fragment.""" + """Extract a single chapter from the EPUB (or DB) and return it as an HTML fragment.""" + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT title, content FROM book_chapters WHERE filename = %s AND chapter_index = %s", + (filename, index), + ) + row = cur.fetchone() + if not row: + return Response(status_code=404) + title, content = row + safe_title = _html.escape(title or "") + return Response( + f'

{safe_title}

\n{content}\n', + media_type="text/html", + ) + path = resolve_library_path(filename) if path is None: return Response(status_code=404) @@ -605,11 +662,16 @@ async def save_progress(filename: str, request: Request): @router.post("/library/mark-read/{filename:path}") async def library_mark_read(filename: str, request: Request): - if resolve_library_path(filename) is None: - return {"error": "Invalid filename"} - path = resolve_library_path(filename) - if path is None or not path.exists(): - return {"error": "File not found"} + if not is_db_filename(filename): + path = resolve_library_path(filename) + if path is None or not path.exists(): + return {"error": "File not found"} + else: + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,)) + if not cur.fetchone(): + return {"error": "Not found"} body = {} try: body = await request.json() @@ -635,18 +697,23 @@ async def library_mark_read(filename: str, request: Request): @router.get("/library/book/{filename:path}", response_class=HTMLResponse) async def book_detail_page(filename: str, request: Request): - path = resolve_library_path(filename) - if path is None: - return HTMLResponse("Not found", status_code=404) - if not path.exists(): - return HTMLResponse("Not found", status_code=404) + db_book = is_db_filename(filename) + if not db_book: + path = resolve_library_path(filename) + if path is None: + return HTMLResponse("Not found", status_code=404) + if not path.exists(): + return HTMLResponse("Not found", status_code=404) + else: + path = None with get_db_conn() as conn: with conn.cursor() as cur: cur.execute( """ SELECT title, author, publisher, has_cover, series, series_index, publication_status, want_to_read, source_url, archived, publish_date, description, - rating, COALESCE(series_suffix, '') AS series_suffix + rating, COALESCE(series_suffix, '') AS series_suffix, + COALESCE(storage_type, 'file') AS storage_type FROM library WHERE filename = %s """, (filename,), @@ -668,9 +735,12 @@ async def book_detail_page(filename: str, request: Request): "publish_date": lib_row[10].isoformat() if lib_row[10] else "", "description": lib_row[11] or "", "rating": lib_row[12] or 0, + "storage_type": lib_row[14] or "file", } - # Supplement empty fields from EPUB metadata - if not entry["source_url"] or not entry["publish_date"] or not entry["description"]: + # Supplement empty fields from EPUB metadata (file-based books only) + if not db_book and path and ( + not entry["source_url"] or not entry["publish_date"] or not entry["description"] + ): epub_meta = scan_epub(path) if not entry["source_url"]: entry["source_url"] = epub_meta.get("source_url", "") @@ -679,12 +749,15 @@ async def book_detail_page(filename: str, request: Request): if not entry["description"]: entry["description"] = epub_meta.get("description", "") else: + if db_book: + return HTMLResponse("Not found", status_code=404) entry = scan_epub(path) entry.setdefault("want_to_read", False) entry.setdefault("archived", False) entry.setdefault("publish_date", "") entry.setdefault("description", "") entry.setdefault("rating", 0) + entry.setdefault("storage_type", "file") cur.execute( "SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag_type, tag", @@ -702,7 +775,7 @@ async def book_detail_page(filename: str, request: Request): else: tags_list.append(tag) - if not rows: + if not rows and not db_book and path: # Fallback for books where tags only exist in OPF after DB loss/rebuild. epub_meta = scan_epub(path) for subject in epub_meta.get("subjects", []): @@ -758,6 +831,7 @@ async def book_detail_page(filename: str, request: Request): "cfi": cfi, "rating": entry.get("rating", 0), "series_is_indexed": series_is_indexed, + "storage_type": entry.get("storage_type", "file"), }) @@ -802,10 +876,6 @@ async def api_suggestions(type: str | None = None): @router.patch("/library/book/{filename:path}") async def book_update(filename: str, request: Request): """Update book metadata and tags, and rename/move the file when needed.""" - old_path = resolve_library_path(filename) - if old_path is None or not old_path.exists(): - return JSONResponse({"error": "not found"}, status_code=404) - body = await request.json() title = body.get("title", "") author = body.get("author", "") @@ -813,6 +883,89 @@ async def book_update(filename: str, request: Request): series = body.get("series", "") from routers.common import parse_volume_str series_index, series_suffix = parse_volume_str(body.get("series_index", "")) + + # --- DB-stored book branch (no file on disk) --- + if is_db_filename(filename): + base_new = make_rel_path( + media_type="db", + publisher=publisher, + author=author, + title=title, + series=series, + series_index=series_index, + series_suffix=series_suffix, + ).as_posix() + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,)) + if not cur.fetchone(): + return JSONResponse({"error": "not found"}, status_code=404) + new_filename = ensure_unique_db_filename(conn, base_new) if base_new != filename else filename + with conn: + with conn.cursor() as cur: + cur.execute("SELECT has_cover FROM library WHERE filename = %s", (filename,)) + row = cur.fetchone() + has_cover = bool(row[0]) if row else False + cur.execute( + """ + INSERT INTO library ( + filename, title, author, publisher, has_cover, + series, series_index, series_suffix, publication_status, + source_url, publish_date, description, + archived, needs_review, storage_type, updated_at + ) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, 'db', NOW()) + ON CONFLICT (filename) DO UPDATE SET + title = EXCLUDED.title, + author = EXCLUDED.author, + publisher = EXCLUDED.publisher, + series = EXCLUDED.series, + series_index = EXCLUDED.series_index, + series_suffix = EXCLUDED.series_suffix, + publication_status = EXCLUDED.publication_status, + source_url = EXCLUDED.source_url, + publish_date = EXCLUDED.publish_date, + description = EXCLUDED.description, + needs_review = FALSE, + updated_at = NOW() + """, + ( + new_filename, title, author, publisher, has_cover, + series, series_index if series else 0, + series_suffix if series else "", + body.get("publication_status", ""), + body.get("source_url", ""), + body.get("publish_date") or None, + body.get("description", ""), + ), + ) + if new_filename != filename: + cur.execute("UPDATE book_tags SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE reading_progress SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE reading_sessions SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE library_cover_cache SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE book_chapters SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("UPDATE bookmarks SET filename = %s WHERE filename = %s", (new_filename, filename)) + cur.execute("DELETE FROM library WHERE filename = %s", (filename,)) + cur.execute("DELETE FROM book_tags WHERE filename = %s", (new_filename,)) + rows = ( + [(new_filename, g, "genre") for g in body.get("genres", []) if g] + + [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g] + + [(new_filename, g, "tag") for g in body.get("tags", []) if g] + ) + if rows: + cur.executemany( + "INSERT INTO book_tags (filename, tag, tag_type) VALUES (%s, %s, %s)" + " ON CONFLICT (filename, tag, tag_type) DO NOTHING", + rows, + ) + return JSONResponse({"ok": True, "filename": new_filename, "renamed": new_filename != filename}) + + # --- File-based book branch --- + old_path = resolve_library_path(filename) + if old_path is None or not old_path.exists(): + return JSONResponse({"error": "not found"}, status_code=404) + ext = old_path.suffix.lower() target_rel = _make_rel_path( @@ -928,9 +1081,12 @@ async def book_update(filename: str, request: Request): @router.post("/library/rating/{filename:path}") async def set_rating(filename: str, request: Request): """Set (or clear) a 1-5 star rating for a book. rating=0 removes it.""" - path = resolve_library_path(filename) - if path is None or not path.exists(): - return JSONResponse({"error": "not found"}, status_code=404) + if not is_db_filename(filename): + path = resolve_library_path(filename) + if path is None or not path.exists(): + return JSONResponse({"error": "not found"}, status_code=404) + else: + path = None body = await request.json() try: @@ -938,17 +1094,18 @@ async def set_rating(filename: str, request: Request): except (TypeError, ValueError): return JSONResponse({"error": "invalid rating"}, status_code=400) - ext = path.suffix.lower() - if ext == ".epub": - try: - _write_epub_rating(path, rating) - except Exception as e: - return JSONResponse({"error": f"epub write failed: {e}"}, status_code=500) - elif ext == ".cbz": - try: - _write_cbz_rating(path, rating) - except Exception as e: - return JSONResponse({"error": f"cbz write failed: {e}"}, status_code=500) + if path is not None: + ext = path.suffix.lower() + if ext == ".epub": + try: + _write_epub_rating(path, rating) + except Exception as e: + return JSONResponse({"error": f"epub write failed: {e}"}, status_code=500) + elif ext == ".cbz": + try: + _write_cbz_rating(path, rating) + except Exception as e: + return JSONResponse({"error": f"cbz write failed: {e}"}, status_code=500) with get_db_conn() as conn: with conn: @@ -961,18 +1118,313 @@ async def set_rating(filename: str, request: Request): return JSONResponse({"ok": True, "rating": rating}) +# --------------------------------------------------------------------------- +# Fase 4 — EPUB → DB conversion +# --------------------------------------------------------------------------- + +def _epub_body_inner(xhtml: str, z: zf.ZipFile, href: str) -> tuple[str, list[dict]]: + """Parse an EPUB chapter XHTML, rewrite inline images to imagestore URLs. + + Returns (inner_html_without_body_tags, []). Images are written to disk but + not registered in book_images here (that happens in the final DB transaction). + """ + soup = BeautifulSoup(xhtml, "lxml") + body = soup.find("body") + if not body: + return "", [] + + href_dir = href.rsplit("/", 1)[0] if "/" in href else "" + names = z.namelist() + + for img in body.find_all("img"): + src = img.get("src", "") + if not src or src.startswith("http") or src.startswith("data:"): + continue + # Resolve relative path inside ZIP + parts = (href_dir.split("/") if href_dir else []) + src.split("/") + resolved: list[str] = [] + for p in parts: + if p == "..": + if resolved: + resolved.pop() + elif p: + resolved.append(p) + zip_path = "/".join(resolved) + img_data: bytes | None = None + if zip_path in names: + img_data = z.read(zip_path) + else: + lo = zip_path.lower() + match = next((n for n in names if n.lower() == lo), None) + if match: + img_data = z.read(match) + if img_data: + ext_s = zip_path.rsplit(".", 1)[-1].lower() if "." in zip_path else "jpg" + mime = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png", + "webp": "image/webp", "gif": "image/gif"}.get(ext_s, "image/jpeg") + _, _, url = write_image_file(img_data, mime) + img["src"] = url + else: + img.decompose() + + # Strip leading heading — EPUB chapters often open with the chapter title as + # an

/

/

. The chapter endpoint always prepends its own + #

, so keep the stored content heading-free. + for child in list(body.children): + if getattr(child, "name", None) is None: + continue # NavigableString / text node — skip + if not child.get_text(strip=True): + child.decompose() + continue + if child.name in ("h1", "h2", "h3"): + child.decompose() + break + + return body.decode_contents(), [] + + +@router.post("/api/library/convert-to-db/{filename:path}") +async def convert_to_db(filename: str): + """Convert a file-based EPUB to DB storage.""" + if is_db_filename(filename): + return JSONResponse({"error": "Already a DB book"}, status_code=400) + + old_path = resolve_library_path(filename) + if old_path is None or not old_path.exists(): + return JSONResponse({"error": "File not found"}, status_code=404) + if old_path.suffix.lower() != ".epub": + return JSONResponse({"error": "Only EPUB files can be converted"}, status_code=400) + + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT title, author, publisher, series, series_index, series_suffix " + "FROM library WHERE filename = %s", + (filename,), + ) + row = cur.fetchone() + if not row: + return JSONResponse({"error": "Book not in library"}, status_code=404) + title, author, publisher, series, series_index, series_suffix = row + + # Extract chapters from EPUB + try: + spine = _epub_spine(old_path) + chapters = [] + with zf.ZipFile(old_path, "r") as z: + for entry in spine: + try: + xhtml = z.read(entry["href"]).decode("utf-8", errors="replace") + except KeyError: + continue + inner, _ = _epub_body_inner(xhtml, z, entry["href"]) + if inner.strip(): + chapters.append({"title": entry["title"], "content_html": inner}) + except Exception as e: + return JSONResponse({"error": f"Failed to extract EPUB: {e}"}, status_code=500) + + if not chapters: + return JSONResponse({"error": "No chapters found"}, status_code=400) + + base_fn = make_rel_path( + media_type="db", + publisher=publisher or "", + author=author or "", + title=title or "", + series=series or "", + series_index=series_index or 0, + series_suffix=series_suffix or "", + ).as_posix() + + with get_db_conn() as conn: + with conn: + new_fn = ensure_unique_db_filename(conn, base_fn) + with conn.cursor() as cur: + # Insert new library row + cur.execute( + """ + INSERT INTO library (filename, media_type, storage_type, title, author, publisher, + has_cover, series, series_index, series_suffix, publication_status, + source_url, publish_date, description, archived, want_to_read, + needs_review, rating, created_at, updated_at) + SELECT %s, media_type, 'db', title, author, publisher, + has_cover, series, series_index, series_suffix, publication_status, + source_url, publish_date, description, archived, want_to_read, + needs_review, rating, created_at, NOW() + FROM library WHERE filename = %s + """, + (new_fn, filename), + ) + # Migrate child tables + cur.execute("UPDATE book_tags SET filename = %s WHERE filename = %s", (new_fn, filename)) + cur.execute("UPDATE reading_progress SET filename = %s WHERE filename = %s", (new_fn, filename)) + cur.execute( + "INSERT INTO reading_sessions (filename, read_at) SELECT %s, read_at FROM reading_sessions WHERE filename = %s", + (new_fn, filename), + ) + cur.execute("DELETE FROM reading_sessions WHERE filename = %s", (filename,)) + cur.execute("UPDATE bookmarks SET filename = %s WHERE filename = %s", (new_fn, filename)) + cur.execute( + "INSERT INTO library_cover_cache (filename, mime_type, thumb_webp, updated_at) " + "SELECT %s, mime_type, thumb_webp, updated_at FROM library_cover_cache WHERE filename = %s", + (new_fn, filename), + ) + cur.execute("DELETE FROM library_cover_cache WHERE filename = %s", (filename,)) + + # Insert chapters + for idx, ch in enumerate(chapters): + upsert_chapter(conn, new_fn, idx, ch["title"], ch["content_html"]) + + with conn.cursor() as cur: + cur.execute("DELETE FROM library WHERE filename = %s", (filename,)) + + try: + old_path.unlink() + prune_empty_dirs(old_path.parent) + except Exception: + pass + + return JSONResponse({"ok": True, "new_filename": new_fn}) + + +# --------------------------------------------------------------------------- +# Fase 5 — DB → EPUB export +# --------------------------------------------------------------------------- + +def _rewrite_db_images_for_epub(content_html: str, seen: dict[str, str]) -> tuple[str, list[dict]]: + """Replace /library/db-images/... img src with EPUB-internal paths. + + seen: sha256 → epub_path (deduplication across chapters) + Returns (modified_html, new_image_dicts) where dicts have epub_path/data/media_type. + """ + soup = BeautifulSoup(content_html, "html.parser") + new_images: list[dict] = [] + for img in soup.find_all("img"): + src = img.get("src", "") + if not src.startswith("/library/db-images/"): + continue + rel = src[len("/library/db-images/"):] + img_file = IMAGES_DIR / rel + if not img_file.exists(): + img.decompose() + continue + sha256 = img_file.stem + ext = img_file.suffix.lower() + if sha256 not in seen: + epub_path = f"OEBPS/Images/{sha256}{ext}" + seen[sha256] = epub_path + mime = {".jpg": "image/jpeg", ".png": "image/png", + ".webp": "image/webp", ".gif": "image/gif"}.get(ext, "image/jpeg") + new_images.append({"epub_path": epub_path, "data": img_file.read_bytes(), "media_type": mime}) + img["src"] = f"../Images/{sha256}{ext}" + return str(soup), new_images + + +@router.get("/api/library/export-epub/{filename:path}") +async def export_epub(filename: str): + """Export a DB-stored book as an EPUB download (no file written to disk).""" + if not is_db_filename(filename): + return JSONResponse({"error": "Not a DB book"}, status_code=400) + + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """SELECT title, author, publisher, series, series_index, publication_status, + source_url, description, publish_date + FROM library WHERE filename = %s""", + (filename,), + ) + meta_row = cur.fetchone() + if not meta_row: + return JSONResponse({"error": "Not found"}, status_code=404) + + cur.execute( + "SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag_type, tag", + (filename,), + ) + tag_rows = cur.fetchall() + + cur.execute( + "SELECT chapter_index, title, content FROM book_chapters " + "WHERE filename = %s ORDER BY chapter_index", + (filename,), + ) + ch_rows = cur.fetchall() + + cur.execute( + "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s", + (filename,), + ) + cover_row = cur.fetchone() + + title, author, publisher, series, series_index, pub_status, source_url, description, pub_date = meta_row + cover_data: bytes | None = bytes(cover_row[0]) if cover_row and cover_row[0] else None + + genres = [t for t, tp in tag_rows if tp == "genre"] + subgenres = [t for t, tp in tag_rows if tp == "subgenre"] + tags = [t for t, tp in tag_rows if tp in ("tag", "subject")] + + book_info = { + "genres": genres, "subgenres": subgenres, "tags": tags, + "description": description or "", + "source_url": source_url or "", + "publisher": publisher or "", + "series": series or "", + "series_index": series_index or 1, + "publication_status": pub_status or "", + "updated_date": pub_date.isoformat() if pub_date else "", + } + + seen_images: dict[str, str] = {} + chapters = [] + for ch_idx, ch_title, ch_content in ch_rows: + modified_html, new_imgs = _rewrite_db_images_for_epub(ch_content, seen_images) + chapter_xhtml = make_chapter_xhtml(ch_title or f"Chapter {ch_idx + 1}", modified_html, ch_idx + 1) + chapters.append({"title": ch_title or f"Chapter {ch_idx + 1}", "xhtml": chapter_xhtml, "images": new_imgs}) + + try: + break_img_data = open("static/break.png", "rb").read() + except Exception: + break_img_data = b"" + + book_id = str(uuid.uuid4()) + epub_bytes = make_epub( + title or "Untitled", author or "Unknown", chapters, + cover_data, break_img_data, book_id, book_info, + ) + + safe_title = re.sub(r'[^\w\-. ]', '', (title or "book")).strip() or "book" + return Response( + content=epub_bytes, + media_type="application/epub+zip", + headers={"Content-Disposition": f'attachment; filename="{safe_title}.epub"'}, + ) + + @router.get("/library/read/{filename:path}", response_class=HTMLResponse) async def reader_page(filename: str, request: Request): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT title FROM library WHERE filename = %s", (filename,)) + row = cur.fetchone() + + if is_db_filename(filename): + if not row: + return HTMLResponse("Not found", status_code=404) + title = row[0] if row[0] else filename + return templates.TemplateResponse(request, "reader.html", { + "filename": filename, + "title": title, + "format": "epub", + "epub_url": "", + }) + path = resolve_library_path(filename) if path is None: return HTMLResponse("Not found", status_code=404) if not path.exists(): return HTMLResponse("Not found", status_code=404) - with get_db_conn() as conn: - with conn.cursor() as cur: - cur.execute("SELECT title FROM library WHERE filename = %s", (filename,)) - row = cur.fetchone() - title = row[0] if row and row[0] else filename + title = row[0] if row and row[0] else filename fmt = path.suffix.lower().lstrip(".") return templates.TemplateResponse(request, "reader.html", { "filename": filename, diff --git a/containers/novela/routers/search.py b/containers/novela/routers/search.py new file mode 100644 index 0000000..8325bb0 --- /dev/null +++ b/containers/novela/routers/search.py @@ -0,0 +1,63 @@ +"""search.py — Full-text search over DB-stored book chapters.""" + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse, JSONResponse +from fastapi.templating import Jinja2Templates + +from db import get_db_conn + +router = APIRouter() +templates = Jinja2Templates(directory="templates") + + +@router.get("/search", response_class=HTMLResponse) +async def search_page(request: Request): + return templates.TemplateResponse(request, "search.html", {"active": "search"}) + + +@router.get("/api/search") +async def api_search(q: str = ""): + q = q.strip() + if not q or len(q) > 500: + return JSONResponse([]) + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT + l.filename, + l.title, + l.author, + bc.chapter_index, + bc.title AS chapter_title, + ts_headline( + 'simple', bc.content, + plainto_tsquery('simple', %s), + 'MaxFragments=1, MaxWords=25, MinWords=8, StartSel=, StopSel=' + ) AS snippet, + ts_rank(bc.content_tsv, plainto_tsquery('simple', %s)) AS rank + FROM book_chapters bc + JOIN library l ON l.filename = bc.filename + WHERE (bc.content_tsv @@ plainto_tsquery('simple', %s) + OR LOWER(bc.title) LIKE LOWER('%%' || %s || '%%')) + AND NOT l.archived + ORDER BY rank DESC, bc.chapter_index ASC + LIMIT 30 + """, + (q, q, q, q), + ) + rows = cur.fetchall() + + results = [ + { + "filename": r[0], + "title": r[1] or "", + "author": r[2] or "", + "chapter_index": r[3], + "chapter_title": r[4] or "", + "snippet": r[5] or "", + "rank": float(r[6]), + } + for r in rows + ] + return JSONResponse(results) diff --git a/containers/novela/scrapers/__init__.py b/containers/novela/scrapers/__init__.py index 3fdb823..18fb974 100644 --- a/containers/novela/scrapers/__init__.py +++ b/containers/novela/scrapers/__init__.py @@ -1,9 +1,11 @@ from .base import BaseScraper +from .archiveofourown import ArchiveOfOurOwnScraper from .awesomedude import AwesomeDudeScraper from .gayauthors import GayAuthorsScraper # Register scrapers in priority order (first match wins) _SCRAPERS: list[type[BaseScraper]] = [ + ArchiveOfOurOwnScraper, AwesomeDudeScraper, GayAuthorsScraper, ] diff --git a/containers/novela/scrapers/archiveofourown.py b/containers/novela/scrapers/archiveofourown.py new file mode 100644 index 0000000..f82b02f --- /dev/null +++ b/containers/novela/scrapers/archiveofourown.py @@ -0,0 +1,206 @@ +import re +from urllib.parse import urljoin + +import httpx +from bs4 import BeautifulSoup + +from .base import BaseScraper + +AO3_BASE = "https://archiveofourown.org" + + +class ArchiveOfOurOwnScraper(BaseScraper): + + @classmethod + def matches(cls, url: str) -> bool: + return "archiveofourown.org" in url + + def _work_base_url(self, url: str) -> str: + """Strip chapter segment and query string; return /works/NNNNNN base URL.""" + m = re.search(r"(https?://[^/]+/works/\d+)", url) + return m.group(1) if m else url.rstrip("/") + + async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool: + r = await client.get(AO3_BASE + "/users/login") + soup = BeautifulSoup(r.text, "html.parser") + token_el = soup.find("input", {"name": "authenticity_token"}) + token = token_el["value"] if token_el else "" + resp = await client.post( + AO3_BASE + "/users/login", + data={ + "user[login]": username, + "user[password]": password, + "authenticity_token": token, + "commit": "Log in", + }, + ) + # Successful login redirects away from the login page + return "/users/login" not in str(resp.url) + + async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: + base_url = self._work_base_url(url) + r = await client.get(base_url, params={"view_adult": "true"}) + soup = BeautifulSoup(r.text, "html.parser") + + # Title + title_el = soup.find("h2", class_="title") + book_title = title_el.get_text(strip=True) if title_el else "Unknown title" + + # Author — can be multiple; Anonymous if no author link + byline = soup.find("h3", class_="byline") + if byline: + author_links = byline.find_all("a", rel="author") + author = ", ".join(a.get_text(strip=True) for a in author_links) if author_links else "Anonymous" + else: + author = "Anonymous" + + # Tags from dl.work.meta.group + meta_dl = soup.find("dl", class_="work") + + def _tag_list(dl, css_class: str) -> list[str]: + dd = dl.find("dd", class_=css_class) if dl else None + return [a.get_text(strip=True) for a in dd.find_all("a")] if dd else [] + + fandoms = _tag_list(meta_dl, "fandom") + ratings = _tag_list(meta_dl, "rating") + categories = _tag_list(meta_dl, "category") + relationships = _tag_list(meta_dl, "relationship") + characters = _tag_list(meta_dl, "character") + freeform_tags = _tag_list(meta_dl, "freeform") + + # Series + series = "" + series_index_hint = 0 + if meta_dl: + series_dd = meta_dl.find("dd", class_="series") + if series_dd: + series_link = series_dd.find("a") + if series_link: + series = series_link.get_text(strip=True) + pos_span = series_dd.find("span", class_="position") + if pos_span: + m = re.search(r"Part\s+(\d+)", pos_span.get_text(), re.I) + if m: + series_index_hint = int(m.group(1)) + + # Stats (nested dl.stats inside the meta dl) + published = "" + updated_date = "" + publication_status = "" + if meta_dl: + stats_dl = meta_dl.find("dl", class_="stats") + if stats_dl: + pub_dd = stats_dl.find("dd", class_="published") + if pub_dd: + published = pub_dd.get_text(strip=True) + + status_dt = stats_dl.find("dt", class_="status") + status_dd = stats_dl.find("dd", class_="status") + if status_dt and status_dd: + updated_date = status_dd.get_text(strip=True) + if "Completed" in status_dt.get_text(): + publication_status = "Complete" + else: + publication_status = "Ongoing" + else: + # No status entry — determine from chapters count (N/N = complete) + updated_date = published + chapters_dd = stats_dl.find("dd", class_="chapters") + if chapters_dd: + m = re.match(r"(\d+)/(\d+|\?)", chapters_dd.get_text(strip=True)) + if m: + if m.group(2) == "?": + publication_status = "Ongoing" + elif m.group(1) == m.group(2): + publication_status = "Complete" + + # Summary + description = "" + summary_div = soup.find("div", class_="summary") + if summary_div: + userstuff = summary_div.find("blockquote", class_="userstuff") + if userstuff: + paras = [p.get_text(strip=True) for p in userstuff.find_all("p") if p.get_text().strip()] + description = "\n\n".join(paras) if paras else userstuff.get_text(strip=True) + + # Chapter list via /navigate + chapter_links = [] + chapter_method = "html_scan" + try: + nr = await client.get(base_url + "/navigate", params={"view_adult": "true"}) + nsoup = BeautifulSoup(nr.text, "html.parser") + chapter_ol = nsoup.find("ol", class_="chapter") + if chapter_ol: + for li in chapter_ol.find_all("li"): + a = li.find("a", href=True) + if a: + chapter_links.append({ + "url": urljoin(AO3_BASE, a["href"]), + "title": a.get_text(strip=True), + }) + except Exception: + pass + + # Fallback: single-chapter work — the work page itself is the content + if not chapter_links: + chapter_method = "fallback_numeric" + chapter_links.append({"url": base_url, "title": book_title}) + + # Map to Novela tag structure: + # fandoms → genres + # ratings + categories → subgenres + # relationships + characters + freeform → tags + return { + "title": book_title, + "author": author, + "publisher": "Archive of Our Own", + "series": series, + "series_index_hint": series_index_hint, + "genres": fandoms, + "subgenres": ratings + categories, + "tags": relationships + characters + freeform_tags, + "description": description, + "updated_date": updated_date, + "publication_status": publication_status, + "source_url": base_url, + "chapters": chapter_links, + "chapter_method": chapter_method, + } + + async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: + r = await client.get(ch["url"], params={"view_adult": "true"}) + soup = BeautifulSoup(r.text, "html.parser") + + # Chapter title and optional summary from the chapter preface + title = ch["title"] + chapter_summary_bq = None + chapters_div = soup.find("div", id="chapters") + if chapters_div: + chapter_div = chapters_div.find("div", class_="chapter") + if chapter_div: + title_el = chapter_div.find("h3", class_="title") + if title_el: + raw = title_el.get_text(strip=True) + if raw: + title = raw + summary_div = chapter_div.find("div", class_="summary") + if summary_div: + chapter_summary_bq = summary_div.find("blockquote", class_="userstuff") + + # Content: div.userstuff inside #chapters (excludes author notes) + content_el = None + if chapters_div: + content_el = chapters_div.find("div", class_="userstuff") + if not content_el: + content_el = soup.find("div", attrs={"role": "article"}) + + # Prepend chapter summary as blockquote before story content + if chapter_summary_bq and content_el: + content_el.insert(0, chapter_summary_bq) + + return { + "title": title, + "content_el": content_el, + "selector_id": content_el.get("id") if content_el else None, + "selector_class": " ".join(content_el.get("class", [])) if content_el else None, + } diff --git a/containers/novela/static/conversion.js b/containers/novela/static/conversion.js index 12ba5ab..7e8b37b 100644 --- a/containers/novela/static/conversion.js +++ b/containers/novela/static/conversion.js @@ -71,9 +71,14 @@ function connectConversionStream(job_id) { document.querySelectorAll('.chapter-item').forEach(el => el.className = 'chapter-item done'); document.getElementById('result-meta').innerHTML = `${esc(d.title)}
${d.chapters} chapters successfully converted`; - document.getElementById('download-btn').onclick = () => { - window.location = `/download/${encodeURIComponent(d.filename)}`; - }; + const dlBtn = document.getElementById('download-btn'); + if (d.storage_type === 'db') { + dlBtn.querySelector('span') && (dlBtn.querySelector('span').textContent = 'Export EPUB'); + dlBtn.onclick = () => { window.location = `/api/library/export-epub/${encodeURIComponent(d.filename)}`; }; + } else { + dlBtn.querySelector('span') && (dlBtn.querySelector('span').textContent = 'Download EPUB'); + dlBtn.onclick = () => { window.location = `/download/${encodeURIComponent(d.filename)}`; }; + } document.getElementById('book-detail-btn').onclick = () => { window.location = `/library/book/${encodeURIComponent(d.filename)}`; }; diff --git a/containers/novela/static/editor.css b/containers/novela/static/editor.css index 21ee86b..e61dd1c 100644 --- a/containers/novela/static/editor.css +++ b/containers/novela/static/editor.css @@ -30,6 +30,13 @@ html, body { height: 100%; background: var(--bg); color: var(--text); font-famil text-align: center; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } +.chapter-title-input { + flex: 1; font-size: 0.72rem; font-family: var(--mono); color: var(--text); + background: var(--surface2); border: 1px solid var(--border); border-radius: var(--radius); + padding: 0.25rem 0.5rem; outline: none; min-width: 0; +} +.chapter-title-input:focus { border-color: var(--accent); } + .header-actions { display: flex; align-items: center; gap: 0.5rem; flex-shrink: 0; } .save-status { diff --git a/containers/novela/static/editor.js b/containers/novela/static/editor.js index 9ca9d63..78c9626 100644 --- a/containers/novela/static/editor.js +++ b/containers/novela/static/editor.js @@ -1,20 +1,26 @@ require.config({ paths: { vs: 'https://cdn.jsdelivr.net/npm/monaco-editor@0.45.0/min/vs' } }); -const { filename } = EDITOR; +const { filename, is_db } = EDITOR; let editor = null; let chapters = []; let currentIndex = -1; let dirty = new Set(); // indices with unsaved changes let pendingContent = new Map(); // index -> modified content not yet saved +let pendingTitles = new Map(); // index -> modified title not yet saved (DB only) let loadingChapter = false; // suppress dirty events during setValue let saving = false; // ── Init Monaco ─────────────────────────────────────────────────────────────── require(['vs/editor/editor.main'], function () { + if (is_db) { + document.getElementById('header-chapter').style.display = 'none'; + document.getElementById('chapter-title-input').style.display = ''; + } + editor = monaco.editor.create(document.getElementById('editor-pane'), { - language: 'xml', + language: is_db ? 'html' : 'xml', theme: 'vs-dark', wordWrap: 'on', minimap: { enabled: true }, @@ -39,6 +45,19 @@ require(['vs/editor/editor.main'], function () { // Ctrl+S / Cmd+S editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, saveChapter); + if (is_db) { + document.getElementById('chapter-title-input').addEventListener('input', () => { + if (currentIndex >= 0) { + pendingTitles.set(currentIndex, document.getElementById('chapter-title-input').value); + dirty.add(currentIndex); + renderChapterList(); + setStatus('dirty', 'Unsaved changes'); + document.getElementById('btn-save').disabled = false; + updateSaveAll(); + } + }); + } + loadChapterList(); }); @@ -57,6 +76,7 @@ async function loadChapterList(targetIndex = 0) { currentIndex = -1; dirty.clear(); pendingContent.clear(); + pendingTitles.clear(); renderChapterList(); document.getElementById('header-chapter').textContent = 'No chapters'; document.getElementById('btn-save').disabled = true; @@ -94,6 +114,11 @@ async function switchChapter(index) { if (dirty.has(currentIndex) && editor) { pendingContent.set(currentIndex, editor.getValue()); } + // Preserve title input for DB books + if (is_db && currentIndex >= 0) { + const inp = document.getElementById('chapter-title-input'); + if (inp) pendingTitles.set(currentIndex, inp.value); + } loadChapter(index); } @@ -102,19 +127,19 @@ async function loadChapter(index) { document.getElementById('btn-save').disabled = true; document.getElementById('btn-break').disabled = true; document.getElementById('btn-del-page').disabled = true; - document.getElementById('header-chapter').textContent = 'Loading…'; + if (!is_db) document.getElementById('header-chapter').textContent = 'Loading…'; let content, title; if (pendingContent.has(index)) { content = pendingContent.get(index); - title = chapters[index]?.title ?? ''; + title = pendingTitles.has(index) ? pendingTitles.get(index) : (chapters[index]?.title ?? ''); } else { const resp = await fetch(`/api/edit/chapter/${index}/${encodeURIComponent(filename)}`); if (!resp.ok) { setStatus('error', 'Load failed'); return; } const data = await resp.json(); content = data.content; - title = data.title; + title = pendingTitles.has(index) ? pendingTitles.get(index) : data.title; } currentIndex = index; @@ -123,6 +148,7 @@ async function loadChapter(index) { editor.setValue(content); editor.setScrollTop(0); loadingChapter = false; + editor.focus(); // Restore dirty state based on whether we loaded from pending cache if (dirty.has(index)) { @@ -134,7 +160,11 @@ async function loadChapter(index) { } renderChapterList(); - document.getElementById('header-chapter').textContent = title; + if (is_db) { + document.getElementById('chapter-title-input').value = title; + } else { + document.getElementById('header-chapter').textContent = title; + } document.getElementById('btn-break').disabled = false; document.getElementById('btn-del-page').disabled = chapters.length <= 1; updateSaveAll(); @@ -149,18 +179,28 @@ async function saveChapter() { setStatus('saving', 'Saving…'); try { + const saveBody = { content: editor.getValue() }; + if (is_db) { + const inp = document.getElementById('chapter-title-input'); + saveBody.title = inp ? inp.value.trim() : (pendingTitles.get(currentIndex) || ''); + } const resp = await fetch( `/api/edit/chapter/${currentIndex}/${encodeURIComponent(filename)}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ content: editor.getValue() }), + body: JSON.stringify(saveBody), } ); const data = await resp.json(); if (data.ok) { dirty.delete(currentIndex); pendingContent.delete(currentIndex); + if (is_db && chapters[currentIndex]) { + const saved = pendingTitles.get(currentIndex) || chapters[currentIndex].title; + chapters[currentIndex].title = saved || chapters[currentIndex].title; + pendingTitles.delete(currentIndex); + } renderChapterList(); setStatus('saved', 'Saved'); setTimeout(() => setStatus('', ''), 2000); @@ -186,9 +226,13 @@ async function saveAllChapters() { if (btn) btn.disabled = true; setStatus('saving', 'Saving all…'); - // Flush current editor content into pendingContent first + // Flush current editor content and title into pending caches first if (currentIndex >= 0 && dirty.has(currentIndex)) { pendingContent.set(currentIndex, editor.getValue()); + if (is_db) { + const inp = document.getElementById('chapter-title-input'); + if (inp) pendingTitles.set(currentIndex, inp.value); + } } const indices = [...dirty]; @@ -196,21 +240,29 @@ async function saveAllChapters() { const content = pendingContent.has(i) ? pendingContent.get(i) : (i === currentIndex ? editor.getValue() : null); - if (!content) continue; + // For DB books, a title-only change has no pendingContent — still need to save + const hasTitleChange = is_db && pendingTitles.has(i); + if (!content && !hasTitleChange) continue; try { + const saveBody = { content: content || '' }; + if (is_db) saveBody.title = pendingTitles.has(i) ? pendingTitles.get(i) : (chapters[i]?.title || ''); const resp = await fetch( `/api/edit/chapter/${i}/${encodeURIComponent(filename)}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ content }), + body: JSON.stringify(saveBody), } ); const data = await resp.json(); if (data.ok) { dirty.delete(i); pendingContent.delete(i); + if (is_db && chapters[i]) { + chapters[i].title = pendingTitles.get(i) || chapters[i].title; + pendingTitles.delete(i); + } } } catch { setStatus('error', `Save failed on chapter ${i + 1}`); @@ -251,10 +303,11 @@ function updateSaveAll() { function insertBreak() { if (!editor || currentIndex < 0) return; + const breakSrc = is_db ? '/static/break.png' : '../Images/break.png'; const pos = editor.getPosition(); editor.executeEdits('insert-break', [{ range: new monaco.Range(pos.lineNumber, pos.column, pos.lineNumber, pos.column), - text: '\n
\n', + text: `\n
\n`, forceMoveMarkers: true, }]); editor.focus(); @@ -286,6 +339,7 @@ async function addChapter() { dirty.clear(); pendingContent.clear(); + pendingTitles.clear(); await loadChapterList(data.index ?? Math.max(currentIndex + 1, 0)); setStatus('saved', 'Page added'); setTimeout(() => setStatus('', ''), 1500); @@ -315,6 +369,7 @@ async function deleteChapter() { dirty.clear(); pendingContent.clear(); + pendingTitles.clear(); await loadChapterList(data.index ?? Math.max(currentIndex - 1, 0)); setStatus('saved', 'Page deleted'); setTimeout(() => setStatus('', ''), 1500); diff --git a/containers/novela/static/sidebar.css b/containers/novela/static/sidebar.css index 47c34aa..df6d039 100644 --- a/containers/novela/static/sidebar.css +++ b/containers/novela/static/sidebar.css @@ -72,8 +72,10 @@ html { text-decoration: none; transition: background 0.12s, color 0.12s; } +.sidebar-nav a:visited { color: var(--text-dim); } .sidebar-nav a:hover { background: var(--surface2); color: var(--text); } -.sidebar-nav a.active { background: var(--surface2); color: var(--accent); } +.sidebar-nav a.active, +.sidebar-nav a.active:visited { background: var(--surface2); color: var(--accent); } .sidebar-nav a svg { flex-shrink: 0; } .sidebar-count { diff --git a/containers/novela/templates/_sidebar.html b/containers/novela/templates/_sidebar.html index 42816a0..6089076 100644 --- a/containers/novela/templates/_sidebar.html +++ b/containers/novela/templates/_sidebar.html @@ -152,6 +152,14 @@ +
  • + + + + + Search + +
  • diff --git a/containers/novela/templates/backup.html b/containers/novela/templates/backup.html index 6652299..b621ffb 100644 --- a/containers/novela/templates/backup.html +++ b/containers/novela/templates/backup.html @@ -236,6 +236,43 @@ + +
    +
    Restore
    +

    + Browse a snapshot and restore individual books from Dropbox back to disk. +

    +
    + + +
    + +
    + @@ -503,7 +540,153 @@ } async function refreshAll() { - await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory()]); + await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory(), loadSnapshots()]); + } + + // ── Restore ───────────────────────────────────────────────────────────── + + let _restoreFiles = []; + + async function loadSnapshots() { + const sel = document.getElementById('snapshot-select'); + try { + const r = await fetch('/api/backup/snapshots'); + const d = await r.json(); + if (!d.ok || !d.snapshots.length) { + sel.innerHTML = ''; + return; + } + const current = sel.value; + sel.innerHTML = '' + + d.snapshots.map(s => { + const label = s.created_at + ? `${s.name} (${s.created_at.replace('T', ' ').replace('Z', ' UTC')})` + : s.name; + return ``; + }).join(''); + } catch (_) { + sel.innerHTML = ''; + } + } + + async function onSnapshotChange() { + const name = document.getElementById('snapshot-select').value; + const panel = document.getElementById('restore-file-panel'); + const status = document.getElementById('restore-status'); + if (!name) { + panel.style.display = 'none'; + _restoreFiles = []; + status.textContent = ''; + return; + } + status.className = 'status-line warn'; + status.textContent = 'Loading snapshot files…'; + try { + const r = await fetch(`/api/backup/snapshots/${encodeURIComponent(name)}/files`); + const d = await r.json(); + if (!d.ok) throw new Error(d.error || 'failed'); + _restoreFiles = d.files; + document.getElementById('restore-search').value = ''; + panel.style.display = ''; + renderRestoreFiles(); + status.className = 'status-line ok'; + status.textContent = `${d.files.length} file(s) in snapshot.`; + } catch (e) { + status.className = 'status-line err'; + status.textContent = `Failed to load snapshot files: ${e}`; + panel.style.display = 'none'; + } + } + + function fmtBytes(bytes) { + if (!bytes) return '-'; + if (bytes >= 1024 * 1024) return (bytes / 1024 / 1024).toFixed(1) + ' MB'; + if (bytes >= 1024) return Math.round(bytes / 1024) + ' KB'; + return bytes + ' B'; + } + + function renderRestoreFiles() { + const q = (document.getElementById('restore-search').value || '').toLowerCase().trim(); + const body = document.getElementById('restore-file-body'); + const filtered = q ? _restoreFiles.filter(f => f.path.toLowerCase().includes(q)) : _restoreFiles; + if (!filtered.length) { + body.innerHTML = 'No files found.'; + document.getElementById('btn-restore-selected').disabled = true; + return; + } + body.innerHTML = filtered.map(f => { + const ext = f.path.split('.').pop().toUpperCase(); + const parts = f.path.split('/'); + const name = parts[parts.length - 1]; + const dir = parts.slice(0, -1).join('/'); + const onDisk = f.exists_locally + ? '✓ exists' + : 'missing'; + return ` + + ${esc(ext)} + ${esc(name)}
    ${esc(dir)} + ${esc(fmtBytes(f.size))} + ${onDisk} + + `; + }).join(''); + updateRestoreBtn(); + } + + function updateRestoreBtn() { + const checked = document.querySelectorAll('.restore-chk:checked').length; + document.getElementById('btn-restore-selected').disabled = checked === 0; + } + + function selectAllRestoreFiles() { + document.querySelectorAll('.restore-chk').forEach(el => { el.checked = true; }); + updateRestoreBtn(); + } + + function clearRestoreSelection() { + document.querySelectorAll('.restore-chk').forEach(el => { el.checked = false; }); + updateRestoreBtn(); + } + + function restoreRowBtn(btn) { + const snapshotName = document.getElementById('snapshot-select').value; + _doRestore(snapshotName, [btn.dataset.path]); + } + + function restoreSelected() { + const snapshotName = document.getElementById('snapshot-select').value; + const paths = Array.from(document.querySelectorAll('.restore-chk:checked')).map(el => el.dataset.path); + _doRestore(snapshotName, paths); + } + + async function _doRestore(snapshotName, paths) { + if (!paths.length) return; + const status = document.getElementById('restore-status'); + status.className = 'status-line warn'; + status.textContent = `Restoring ${paths.length} file(s)…`; + try { + const r = await fetch('/api/backup/restore', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({snapshot_name: snapshotName, files: paths}), + }); + const d = await r.json(); + if (!d.ok) throw new Error(d.error || 'failed'); + const failed = (d.results || []).filter(x => !x.ok); + if (failed.length) { + status.className = 'status-line warn'; + status.textContent = `Restored ${d.restored}/${d.total}. Errors: ${failed.map(x => `${x.path}: ${x.error}`).join(' | ')}`; + } else { + status.className = 'status-line ok'; + status.textContent = `Restored ${d.restored}/${d.total} file(s) successfully.`; + } + // Refresh exists_locally state + await onSnapshotChange(); + } catch (e) { + status.className = 'status-line err'; + status.textContent = `Restore failed: ${e}`; + } } refreshAll(); diff --git a/containers/novela/templates/book.html b/containers/novela/templates/book.html index bd78b77..6d3fdcb 100644 --- a/containers/novela/templates/book.html +++ b/containers/novela/templates/book.html @@ -155,6 +155,16 @@ Mark as unread {% endif %} + {% if storage_type == 'db' %} +
    + + + + + + Export EPUB + + {% else %} @@ -163,6 +173,7 @@ Download + {% endif %} - {% if filename.endswith('.epub') %} + {% if filename.endswith('.epub') and storage_type != 'db' %} @@ -193,6 +204,23 @@ Edit EPUB {% endif %} + {% if storage_type == 'db' %} + + + + + + Edit chapters + + {% endif %} + {% if filename.endswith('.epub') and storage_type != 'db' %} + + {% endif %} @@ -330,6 +358,8 @@ + diff --git a/containers/novela/templates/editor.html b/containers/novela/templates/editor.html index 6973b0b..8f44082 100644 --- a/containers/novela/templates/editor.html +++ b/containers/novela/templates/editor.html @@ -24,6 +24,7 @@ {{ (title or filename) | truncate(30, True) }}
    +
    +
    + Save as + + +
    + + + +
    +
    + + + + + + diff --git a/docs/TECHNICAL.md b/docs/TECHNICAL.md index 05c4b40..0e8afcc 100644 --- a/docs/TECHNICAL.md +++ b/docs/TECHNICAL.md @@ -106,9 +106,12 @@ Home read sections are ordered oldest-first: - `novels_read`: `ORDER BY MAX(read_at) ASC` ### `routers/reader.py` +- `GET /library/db-images/{path:path}` — serve image from content-addressed imagestore (`library/images/`); security: path must be under `IMAGES_DIR` +- `POST /api/library/convert-to-db/{filename:path}` — convert on-disk EPUB to a DB-stored book; extracts chapters via `_epub_body_inner` (stores images in imagestore, rewrites src to `/library/db-images/…`), migrates all child tables (INSERT new library row → UPDATE children → DELETE old row), deletes EPUB file; returns `{ok, new_filename}` +- `GET /api/library/export-epub/{filename:path}` — build and stream an EPUB from a DB-stored book; `_rewrite_db_images_for_epub` rewrites `/library/db-images/…` back to `OEBPS/Images/…` paths (dedup by sha256); returns as `Content-Disposition: attachment` - `GET /library/epub/{filename}` — serve EPUB inline (no attachment header) -- `GET /library/chapters/{filename}` — EPUB spine as JSON -- `GET /library/chapter/{index}/{filename}` — single EPUB chapter as HTML fragment +- `GET /library/chapters/{filename}` — EPUB spine as JSON; for `storage_type='db'` books returns chapters from `book_chapters` +- `GET /library/chapter/{index}/{filename}` — single chapter as HTML fragment; for `storage_type='db'` books reads from `book_chapters` - `GET /library/chapter-img/{path}?filename=…` — image extracted from EPUB ZIP; `path` is the full internal ZIP path (e.g. `OEBPS/Images/cover.jpg` or `EPUB/images/cover.jpg`); case-insensitive fallback for mismatched folder names - `GET /library/pdf/{filename}?page=N&dpi=150` — render PDF page as PNG - `GET /api/pdf/info/{filename}` — `{"page_count": N}` @@ -119,7 +122,7 @@ Home read sections are ordered oldest-first: - `POST /library/mark-read/{filename}` — mark as read (with optional date) - `GET /library/book/{filename}` — book detail page - `GET /api/genres` — all tags from `book_tags` (optional `?type=genre|subgenre|tag`) -- `PATCH /library/book/{filename}` — update metadata + tags; moves file if path fields change; DB-only for non-EPUB +- `PATCH /library/book/{filename}` — update metadata + tags; moves file if path fields change; DB-only for non-EPUB; for `storage_type='db'` books: recomputes synthetic `db/…` filename, FK-safe rename (INSERT→UPDATE children→DELETE old), updates `book_chapters` + `bookmarks` as well - `POST /library/rating/{filename}` — set/clear 1–5 star rating; writes to EPUB OPF / CBZ ComicInfo.xml; DB-only for CBR/PDF - `GET /library/read/{filename}` — reader page (EPUB or PDF); supports `?bm_ch=N&bm_scroll=F` to jump to bookmark position - `GET /library/bookmarks/{filename}` — list bookmarks for a book @@ -135,11 +138,11 @@ Home read sections are ordered oldest-first: Filename parsing is done client-side in `bulk_import.html`. The page uses a free-text `%placeholder%` pattern (e.g. `%series% - %volume% - %title% - %year%`). Available placeholders: `%series%` `%volume%` `%title%` `%year%` `%month%` `%day%` `%author%` `%publisher%` `%ignore%`. Colored chips can be clicked (insert at cursor) or dragged onto the input. Pattern is converted to a regex at parse time. Shared metadata fields override filename-parsed values. Files are uploaded in batches of 5 with a progress bar. ### `routers/editor.py` -- `GET /library/editor/{filename}` — EPUB chapter editor page -- `GET /api/edit/chapter/{index}/{filename}` — get chapter HTML -- `POST /api/edit/chapter/{index}/{filename}` — save chapter HTML -- `POST /api/edit/chapter/add/{filename}` — add new chapter -- `DELETE /api/edit/chapter/{index}/{filename}` — delete chapter +- `GET /library/editor/{filename}` — chapter editor page; supports both EPUB files and DB-stored books (`db/…` filenames); passes `is_db` flag to template; DB branch queries `library` table directly (no file check) +- `GET /api/edit/chapter/{index}/{filename}` — get chapter content; DB branch reads from `book_chapters` and returns `{index, href, title, content}` +- `POST /api/edit/chapter/{index}/{filename}` — save chapter; DB branch accepts `{content, title}`, calls `upsert_chapter` (updates `content_tsv` too) +- `POST /api/edit/chapter/add/{filename}` — add new chapter after `after_index`; DB branch shifts `chapter_index` up via `UPDATE … SET chapter_index = chapter_index + 1 WHERE chapter_index >= insert_idx` then inserts +- `DELETE /api/edit/chapter/{index}/{filename}` — delete chapter; DB branch deletes and re-indexes via `UPDATE … SET chapter_index = chapter_index - 1 WHERE chapter_index > index` ### `routers/grabber.py` - `GET /grabber` — grabber page @@ -151,8 +154,23 @@ Filename parsing is done client-side in `bulk_import.html`. The page uses a free - `POST /credentials` — save credential - `DELETE /credentials/{site}` — delete credential - `POST /preload` — preload book info from URL -- `POST /convert` — run scrape + convert to EPUB -- `GET /events/{job_id}` — SSE stream for job progress +- `POST /convert` — run scrape; body may include `storage_mode: "db"` (default) or `"epub"` to control output format +- `GET /events/{job_id}` — SSE stream for job progress; `done` event includes `storage_type` (`'db'` or `'file'`) + +Scrape/convert flow (DB storage — default): +1. Fetch book info + chapters via scraper +2. Per chapter: download images → write to `library/images/{sha2}/{sha256}{ext}` (content-addressed) → rewrite `img[src]` to `/library/db-images/...` → build `content_html` via `element_to_xhtml` +3. One DB transaction: `ensure_unique_db_filename` → `upsert_book` (storage_type='db') → `upsert_chapter` for each chapter → `upsert_cover_cache` if cover provided +4. Synthetic filename: `db/{publisher}/{author}/{title}` (or `db/{pub}/{auth}/Series/{series}/{idx} - {title}` for series) + +Scrape/convert flow (EPUB file — `storage_mode: "epub"`): +1–2. Same as DB flow (images downloaded, HTML built) +3. Chapters converted to XHTML via `make_chapter_xhtml`; EPUB file built via `make_epub` and written to `library/epub/…` +4. `upsert_book` called with `storage_type='file'` + +### `routers/search.py` +- `GET /search` — full-text search page (`search.html`); Enter-to-search, `?q=` param auto-runs on load +- `GET /api/search?q=…` — FTS over `book_chapters.content_tsv`; uses `plainto_tsquery('simple', q)` with `ts_rank` ordering and `ts_headline` for highlighted snippets; also matches chapters whose `title` contains the query (case-insensitive `ILIKE` fallback); LIMIT 30; excludes archived books; results include `filename`, `title`, `author`, `chapter_index`, `chapter_title`, `snippet`, `rank` ### `routers/settings.py` - `GET /settings` — settings page @@ -200,6 +218,9 @@ URL is stored in the `authors` table (`name` unique, `url`, `created_at`, `updat - `GET /api/backup/history` — backup run history (last 20) - `GET /api/backup/progress` — live progress of running backup `{running, done, total, phase}` - `POST /api/backup/run` — trigger backup (background task) +- `GET /api/backup/snapshots` — list available snapshots `{ok, snapshots: [{name, created_at}]}` +- `GET /api/backup/snapshots/{snapshot_name}/files` — list files in a snapshot with local existence check `{ok, snapshot, files: [{path, size, sha256, exists_locally}]}` +- `POST /api/backup/restore` — restore files from a snapshot: `{snapshot_name, files: [rel_paths]}`; downloads from Dropbox, writes to disk, re-indexes via `scan_media` + `upsert_book`; returns `{ok, restored, total, results: [{path, ok, error?}]}` --- @@ -405,6 +426,58 @@ Loaded by `index.html` (Convert page) and `grabber.html` (Grabber page). Require --- +## DB-Stored Books + +Books scraped via the grabber are stored entirely in PostgreSQL (`storage_type = 'db'`). No EPUB file is written. + +### New tables + +| Table | Key columns | Notes | +|---|---|---| +| `book_chapters` | `filename FK, chapter_index, title, content TEXT, content_tsv TSVECTOR` | Unique on `(filename, chapter_index)`; GIN index on `content_tsv` for FTS; `content_tsv` is `to_tsvector('simple', title || ' ' || stripped_html)` — title included for title-based FTS matches | +| `book_images` | `sha256 PK, ext, media_type, size_bytes` | Content-addressed; files live at `library/images/{sha256[:2]}/{sha256}{ext}` | + +### `library.storage_type` + +| Value | Meaning | +|---|---| +| `'file'` | Book lives on disk (EPUB/PDF/CBR/CBZ); default for all existing books | +| `'db'` | Book content lives in `book_chapters`; no file on disk | + +### Synthetic filename for DB books + +`db/{publisher}/{author}/{title}` — or for series: `db/{publisher}/{author}/Series/{series}/{idx:03d} - {title}` + +Same sanitization rules as file-based paths. Uniqueness enforced via `ensure_unique_db_filename` (DB lookup, not filesystem). + +### Chapter editor for DB books + +`GET /library/editor/{filename}` supports DB-stored books. The Monaco editor shows `language: 'html'` for DB books (vs `'xml'` for EPUB). The header shows a title input instead of a read-only chapter name. Unsaved content and titles are preserved across chapter switches via `pendingContent` and `pendingTitles` maps. `editor.focus()` is called after every content load so the editor is immediately interactive. + +### Imagestore + +Images embedded in chapter HTML are stored content-addressed at `library/images/{sha256[:2]}/{sha256}{ext}`. +- Served via `GET /library/db-images/{path:path}` +- URLs embedded in `book_chapters.content` as absolute paths: `/library/db-images/...` +- `book_images` table registers each unique image (auto-deduplication via sha256) + +### EPUB → DB conversion + +`POST /api/library/convert-to-db/{filename}` converts an on-disk EPUB to `storage_type='db'`: +1. Parse EPUB spine → per item: extract body HTML via `_epub_body_inner`, store images in imagestore via `write_image_file`, rewrite `img[src]` to `/library/db-images/…` +2. Compute new synthetic `db/…` filename via `make_rel_path(media_type="db", …)` + `ensure_unique_db_filename` +3. DB transaction: INSERT new library row (storage_type='db') → UPDATE all child tables (book_tags, reading_progress, reading_sessions, bookmarks, library_cover_cache, book_chapters) → DELETE old library row +4. Delete EPUB file from disk + `prune_empty_dirs` + +### DB → EPUB export + +`GET /api/library/export-epub/{filename}` streams an EPUB built from DB content: +1. Query metadata, tags, chapters, cover from DB +2. Per chapter: `_rewrite_db_images_for_epub` strips `/library/db-images/` prefix, reads files from `IMAGES_DIR`, deduplicates by sha256, assigns `OEBPS/Images/{sha256}{ext}` paths, rewrites `img[src]` to `../Images/…` +3. Build EPUB via `make_epub()`; return as `Content-Disposition: attachment` + +--- + ## Known Bugs Fixed - `renderGenreView` and `renderSearchResults` in `library.js` referenced `b.genres` (non-existent). Fixed: use `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`. - `PillInput` in `book.js` did not handle comma as delimiter and did not flush on save. Fixed: comma keydown + `flush()` in `saveEdit()`. diff --git a/docs/changelog-develop.md b/docs/changelog-develop.md index cda1102..4d899d6 100644 --- a/docs/changelog-develop.md +++ b/docs/changelog-develop.md @@ -1,5 +1,60 @@ # Develop Changelog +## 2026-04-03 (3) +- DB chapter editor: Monaco-based editor now supports DB-stored books + - `GET /library/editor/{filename}` handles `db/…` filenames; `is_db` flag passed to template + - `GET /api/edit/chapter/{index}/{filename}` and `POST …`: DB branches query/update `book_chapters` directly; save calls `upsert_chapter` (updates `content_tsv` too) + - `POST /api/edit/chapter/add/{filename}` and `DELETE …`: DB branches insert/delete with `chapter_index` shift via `UPDATE … SET chapter_index = chapter_index ± 1` + - Title editing: header chapter-name replaced with a text input for DB books; `pendingTitles` map preserves unsaved titles across chapter switches (parallel to `pendingContent`); title-only dirty chapters correctly saved in Save All + - `insertBreak`: scene-break image path is `/static/break.png` for DB books (vs `../Images/break.png` for EPUB) + - Fix: `editor.focus()` called after content load so Monaco receives keyboard focus immediately + - Fix: `header-chapter` "Loading…" text suppressed for DB books where that element is hidden + - `book.html`: "Edit chapters" button shown for `storage_type = 'db'` books +- Search: chapter titles now included in FTS + - `upsert_chapter` prepends title to the plain-text input for `to_tsvector`: `title + " " + stripped_html` + - `GET /api/search`: added `OR LOWER(bc.title) LIKE LOWER('%…%')` fallback for chapters whose title matches but content doesn't + - Startup migration `migrate_rebuild_chapter_tsv_with_title()` rebuilds existing `content_tsv` values to include titles +- Grabber: added DB/EPUB storage toggle on the Convert page + - UI toggle above Convert button ("Save as: DB | EPUB file"); `storageMode` JS variable sent in POST body + - `POST /convert`: reads `storage_mode` from body; stored in job as `'db'` or `'epub'` + - `_run_scrape`: EPUB path builds chapters via `make_chapter_xhtml`, calls `make_epub`, writes file, calls `upsert_book(storage_type='file')`; DB path unchanged + - `done` SSE event includes `storage_type`; `conversion.js` updates the download button label/action accordingly +- EPUB → DB conversion: fixed double chapter title + - `_epub_body_inner` strips the first `

    `/`

    `/`

    ` heading from each chapter body before storing; the editor prepends its own heading, so storing the EPUB heading too caused it to appear twice + - Fix for `NavigableString` crash: `getattr(child, "name", None) is None` used instead of `hasattr(child, "name")` — `NavigableString` has `name = None` but no `decompose()` method +- Sidebar: Search link styling fixed + - Stray `
  • Search
  • ` moved inside the Library `