diff --git a/build-and-push.sh b/build-and-push.sh
index 2665bdd..a2e7419 100755
--- a/build-and-push.sh
+++ b/build-and-push.sh
@@ -235,10 +235,11 @@ for svc_path in "${services[@]}"; do
echo "============================================================"
echo "[INFO] Building ${svc} -> tags: ${NEW_VERSION}, latest"
echo "============================================================"
- docker build -t "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:dev" "$svc_path"
+ docker build -t "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:latest" -t "${IMAGE_BASE}:dev" "$svc_path"
docker push "${IMAGE_BASE}:${NEW_VERSION}"
+ docker push "${IMAGE_BASE}:latest"
docker push "${IMAGE_BASE}:dev"
- BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:dev")
+ BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:latest" "${IMAGE_BASE}:dev")
else
echo "============================================================"
echo "[INFO] Test build ${svc} -> tag: latest"
diff --git a/containers/novela/changelog.py b/containers/novela/changelog.py
index 479a6ba..b57d900 100644
--- a/containers/novela/changelog.py
+++ b/containers/novela/changelog.py
@@ -3,6 +3,40 @@ Changelog data for Novela
"""
CHANGELOG = [
+ {
+ "version": "v0.1.3",
+ "date": "2026-04-03",
+ "summary": "DB-stored books: chapters stored in PostgreSQL with full-text search, EPUB conversion, export, and a storage toggle in the grabber.",
+ "sections": [
+ {
+ "title": "New feature",
+ "type": "feature",
+ "changes": [
+ "DB-stored books: scraped books are now stored as chapters in PostgreSQL instead of EPUB files on disk — full-text search, content deduplication, and backup coverage are all handled automatically",
+ "Grabber stores chapters in book_chapters and images in a content-addressed imagestore (sha256-based, automatic deduplication across all books)",
+ "EPUB-to-DB conversion: Convert to DB button on any EPUB book detail page — extracts chapters, migrates all metadata and child rows (tags, progress, bookmarks, cover), removes the EPUB file",
+ "DB-to-EPUB export: Export EPUB button on DB-stored books — builds and streams a standards-compliant EPUB without writing a file to disk",
+ "Full-text search (/search): searches across all DB-stored chapter content via PostgreSQL FTS (tsvector / plainto_tsquery), returns highlighted snippets with direct links to the chapter position in the reader",
+ "Chapter editor supports DB-stored books: Monaco-based editor reads and writes book_chapters directly; chapter titles editable inline; title-only changes correctly included in Save All",
+ "Grabber: storage toggle on the Convert page — choose between DB storage and EPUB file before converting",
+ ],
+ },
+ ],
+ },
+ {
+ "version": "v0.1.2",
+ "date": "2026-04-02",
+ "summary": "Restore functionality on the Backup page.",
+ "sections": [
+ {
+ "title": "New feature",
+ "type": "feature",
+ "changes": [
+ "Restore functionality on the Backup page: browse any available Dropbox snapshot, see which files are currently missing from disk, and restore individual books or a selection back to the library — file is written to disk and immediately re-indexed",
+ ],
+ },
+ ],
+ },
{
"version": "v0.1.1",
"date": "2026-03-31",
diff --git a/containers/novela/main.py b/containers/novela/main.py
index b36d4bb..d53f8aa 100644
--- a/containers/novela/main.py
+++ b/containers/novela/main.py
@@ -17,6 +17,7 @@ from routers import (
grabber_router,
library_router,
reader_router,
+ search_router,
settings_router,
)
@@ -46,6 +47,7 @@ app.include_router(builder_router)
app.include_router(bulk_import_router)
app.include_router(following_router)
app.include_router(changelog_router)
+app.include_router(search_router)
@app.get("/")
diff --git a/containers/novela/migrations.py b/containers/novela/migrations.py
index 5fd144f..df01357 100644
--- a/containers/novela/migrations.py
+++ b/containers/novela/migrations.py
@@ -296,6 +296,59 @@ def migrate_rename_hiatus() -> None:
_exec("UPDATE library SET publication_status = 'Long-Term Hold' WHERE publication_status = 'Hiatus'")
+def migrate_add_storage_type() -> None:
+ _exec(
+ "ALTER TABLE library ADD COLUMN IF NOT EXISTS storage_type VARCHAR(10) NOT NULL DEFAULT 'file'"
+ )
+
+
+def migrate_create_book_images() -> None:
+ _exec(
+ """
+ CREATE TABLE IF NOT EXISTS book_images (
+ sha256 CHAR(64) PRIMARY KEY,
+ ext VARCHAR(10) NOT NULL,
+ media_type VARCHAR(100) NOT NULL,
+ size_bytes INTEGER NOT NULL DEFAULT 0
+ )
+ """
+ )
+
+
+def migrate_create_book_chapters() -> None:
+ _exec(
+ """
+ CREATE TABLE IF NOT EXISTS book_chapters (
+ id SERIAL PRIMARY KEY,
+ filename VARCHAR(600) NOT NULL REFERENCES library(filename) ON DELETE CASCADE,
+ chapter_index INTEGER NOT NULL,
+ title VARCHAR(500) NOT NULL DEFAULT '',
+ content TEXT NOT NULL DEFAULT '',
+ content_tsv TSVECTOR,
+ UNIQUE (filename, chapter_index)
+ )
+ """
+ )
+ _exec(
+ "CREATE INDEX IF NOT EXISTS idx_book_chapters_filename ON book_chapters (filename, chapter_index)"
+ )
+ _exec(
+ "CREATE INDEX IF NOT EXISTS idx_book_chapters_tsv ON book_chapters USING GIN (content_tsv)"
+ )
+
+
+def migrate_rebuild_chapter_tsv_with_title() -> None:
+ """Rebuild content_tsv to include chapter title (safe to run repeatedly)."""
+ _exec(
+ """
+ UPDATE book_chapters
+ SET content_tsv = to_tsvector('simple',
+ COALESCE(title, '') || ' ' ||
+ regexp_replace(COALESCE(content, ''), '<[^>]*>', ' ', 'g'))
+ """
+ )
+
+
def run_migrations() -> None:
migrate_create_library()
migrate_create_book_tags()
@@ -314,3 +367,7 @@ def run_migrations() -> None:
migrate_create_builder_drafts()
migrate_create_authors()
migrate_rename_hiatus()
+ migrate_add_storage_type()
+ migrate_create_book_images()
+ migrate_create_book_chapters()
+ migrate_rebuild_chapter_tsv_with_title()
diff --git a/containers/novela/routers/__init__.py b/containers/novela/routers/__init__.py
index beaae54..0fa2489 100644
--- a/containers/novela/routers/__init__.py
+++ b/containers/novela/routers/__init__.py
@@ -7,6 +7,7 @@ from routers.following import router as following_router
from routers.grabber import router as grabber_router
from routers.library import router as library_router
from routers.reader import router as reader_router
+from routers.search import router as search_router
from routers.settings import router as settings_router
__all__ = [
@@ -20,4 +21,5 @@ __all__ = [
"bulk_import_router",
"following_router",
"changelog_router",
+ "search_router",
]
diff --git a/containers/novela/routers/backup.py b/containers/novela/routers/backup.py
index 9b218a2..430dd09 100644
--- a/containers/novela/routers/backup.py
+++ b/containers/novela/routers/backup.py
@@ -17,6 +17,7 @@ from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from db import get_db_conn
+from routers.common import scan_media, upsert_book
from security import decrypt_value, encrypt_value, is_encrypted_value
templates = Jinja2Templates(directory="templates")
@@ -1196,3 +1197,131 @@ async def run_backup(request: Request):
"message": "Backup started in background.",
"started_at": _now_iso(),
}
+
+
+def _parse_snapshot_date(name: str) -> str:
+ """Parse 'snapshot-20260329-123456.json' → '2026-03-29T12:34:56Z'."""
+ stem = Path(name).stem # snapshot-20260329-123456
+ parts = stem.split("-")
+ if len(parts) >= 3:
+ d, t = parts[1], parts[2]
+ if len(d) == 8 and len(t) == 6:
+ return f"{d[:4]}-{d[4:6]}-{d[6:]}T{t[:2]}:{t[2:4]}:{t[4:]}Z"
+ return ""
+
+
+def _download_and_restore(client: dropbox.Dropbox, objects_root: str, rel: str, info: dict) -> None:
+ sha256 = str(info.get("sha256") or "")
+ if not sha256:
+ raise ValueError("No sha256 in snapshot entry")
+ obj_path = _object_path(objects_root, sha256)
+ _meta, res = client.files_download(obj_path)
+ data = res.content
+ dest = LIBRARY_DIR / rel
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ dest.write_bytes(data)
+ meta = scan_media(dest)
+ tags = [(s, "subject") for s in meta.get("subjects", [])]
+ with get_db_conn() as conn:
+ with conn:
+ upsert_book(conn, rel, meta, tags)
+
+
+@router.get("/api/backup/snapshots")
+async def list_snapshots():
+ try:
+ client = await asyncio.to_thread(_dbx)
+ except Exception as e:
+ return {"ok": False, "error": str(e), "snapshots": []}
+
+ dropbox_root = _load_dropbox_root()
+ snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
+
+ try:
+ paths = await asyncio.to_thread(_list_snapshot_paths, client, snapshots_root)
+ except Exception as e:
+ return {"ok": False, "error": str(e), "snapshots": []}
+
+ snapshots = [
+ {"name": Path(p).name, "created_at": _parse_snapshot_date(Path(p).name)}
+ for p in paths
+ ]
+ return {"ok": True, "snapshots": snapshots}
+
+
+@router.get("/api/backup/snapshots/{snapshot_name}/files")
+async def snapshot_files(snapshot_name: str):
+ try:
+ client = await asyncio.to_thread(_dbx)
+ except Exception as e:
+ return {"ok": False, "error": str(e), "files": []}
+
+ dropbox_root = _load_dropbox_root()
+ snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
+ snapshot_path = _dropbox_join(snapshots_root, snapshot_name)
+
+ try:
+ snap = await asyncio.to_thread(_load_snapshot_data, client, snapshot_path)
+ except Exception as e:
+ return {"ok": False, "error": str(e), "files": []}
+
+ files_data = snap.get("files", {})
+ result = [
+ {
+ "path": rel,
+ "size": info.get("size", 0),
+ "sha256": info.get("sha256", ""),
+ "exists_locally": (LIBRARY_DIR / rel).exists(),
+ }
+ for rel, info in sorted(files_data.items())
+ if isinstance(info, dict)
+ ]
+ return {"ok": True, "snapshot": snapshot_name, "files": result}
+
+
+@router.post("/api/backup/restore")
+async def restore_files(request: Request):
+ body = {}
+ try:
+ body = await request.json()
+ except Exception:
+ pass
+
+ snapshot_name = (body.get("snapshot_name") or "").strip()
+ files_to_restore: list[str] = body.get("files", [])
+
+ if not snapshot_name:
+ return {"ok": False, "error": "snapshot_name is required"}
+ if not files_to_restore:
+ return {"ok": False, "error": "No files specified"}
+
+ try:
+ client = await asyncio.to_thread(_dbx)
+ except Exception as e:
+ return {"ok": False, "error": str(e)}
+
+ dropbox_root = _load_dropbox_root()
+ snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
+ objects_root = _dropbox_join(dropbox_root, "library_objects")
+ snapshot_path = _dropbox_join(snapshots_root, snapshot_name)
+
+ try:
+ snap = await asyncio.to_thread(_load_snapshot_data, client, snapshot_path)
+ except Exception as e:
+ return {"ok": False, "error": f"Failed to load snapshot: {e}"}
+
+ files_data = snap.get("files", {})
+
+ results = []
+ for rel in files_to_restore:
+ if rel not in files_data:
+ results.append({"path": rel, "ok": False, "error": "Not found in snapshot"})
+ continue
+ try:
+ await asyncio.to_thread(_download_and_restore, client, objects_root, rel, files_data[rel])
+ results.append({"path": rel, "ok": True})
+ except Exception as e:
+ results.append({"path": rel, "ok": False, "error": str(e)})
+
+ ok_count = sum(1 for r in results if r["ok"])
+ return {"ok": True, "restored": ok_count, "total": len(results), "results": results}
diff --git a/containers/novela/routers/common.py b/containers/novela/routers/common.py
index b782195..faaec72 100644
--- a/containers/novela/routers/common.py
+++ b/containers/novela/routers/common.py
@@ -1,4 +1,5 @@
import base64
+import hashlib
import html as _html
import io
import posixpath
@@ -18,10 +19,16 @@ from pdf import pdf_cover_thumb, pdf_page_count, pdf_scan_metadata
LIBRARY_DIR = Path("library")
LIBRARY_DIR.mkdir(exist_ok=True)
LIBRARY_ROOT = LIBRARY_DIR.resolve()
+IMAGES_DIR = LIBRARY_DIR / "images"
COVER_W = 300
COVER_H = 450
+def is_db_filename(filename: str) -> bool:
+ """True if the filename is a synthetic DB-stored book path (no file on disk)."""
+ return (filename or "").startswith("db/")
+
+
def clean_segment(value: str, fallback: str, max_len: int) -> str:
txt = re.sub(r"\s+", " ", (value or "").strip())
txt = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", txt)
@@ -78,6 +85,17 @@ def coerce_series_index(value: int | str | None) -> int:
def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, series: str, series_index: int | str | None, series_suffix: str = "", ext: str = "") -> Path:
+ if media_type == "db":
+ pub = clean_segment(publisher, "Unknown Publisher", 80)
+ auth = clean_segment(author, "Unknown Author", 80)
+ ttl = clean_segment(title, "Untitled", 140)
+ series_name = clean_segment(series, "", 80)
+ if series_name:
+ idx = coerce_series_index(series_index)
+ sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5]
+ return Path("db") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}"
+ return Path("db") / pub / auth / ttl
+
if media_type == "epub":
pub = clean_segment(publisher, "Unknown Publisher", 80)
auth = clean_segment(author, "Unknown Author", 80)
@@ -339,12 +357,13 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N
with conn.cursor() as cur:
cur.execute(
"""
- INSERT INTO library (filename, media_type, title, author, publisher, has_cover,
+ INSERT INTO library (filename, media_type, storage_type, title, author, publisher, has_cover,
series, series_index, series_suffix, publication_status, source_url,
publish_date, description, needs_review, want_to_read, rating, updated_at)
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, %s, NOW())
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, %s, NOW())
ON CONFLICT (filename) DO UPDATE SET
media_type = EXCLUDED.media_type,
+ storage_type = EXCLUDED.storage_type,
title = COALESCE(NULLIF(EXCLUDED.title, ''), library.title),
author = COALESCE(NULLIF(EXCLUDED.author, ''), library.author),
publisher = COALESCE(NULLIF(EXCLUDED.publisher, ''), library.publisher),
@@ -362,6 +381,7 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N
(
filename,
meta.get("media_type", "epub"),
+ meta.get("storage_type", "file"),
meta.get("title", ""),
meta.get("author", ""),
meta.get("publisher", ""),
@@ -413,6 +433,7 @@ def list_library_json() -> list[dict]:
(cc.filename IS NOT NULL) AS has_cached_cover,
l.rating,
COALESCE(l.series_suffix, '') AS series_suffix,
+ COALESCE(l.storage_type, 'file') AS storage_type,
json_agg(
json_build_object('tag', bt.tag, 'tag_type', bt.tag_type)
) FILTER (WHERE bt.tag IS NOT NULL) AS tags
@@ -430,7 +451,7 @@ def list_library_json() -> list[dict]:
l.archived, l.needs_review, l.updated_at,
rp.progress, rp.cfi, rp.page,
rs.read_count, rs.last_read,
- cc.filename, l.rating, l.series_suffix
+ cc.filename, l.rating, l.series_suffix, l.storage_type
ORDER BY COALESCE(l.publisher, ''), COALESCE(l.author, ''), COALESCE(l.series, ''), l.series_index, COALESCE(l.title, '')
"""
)
@@ -460,13 +481,92 @@ def list_library_json() -> list[dict]:
"page": r[15],
"read_count": r[16] or 0,
"last_read": r[17].isoformat() if r[17] else None,
- "tags": r[21] or [],
+ "storage_type": r[21] or "file",
+ "tags": r[22] or [],
"rating": r[19] or 0,
}
)
return out
+_IMAGE_EXT_MAP = {
+ "image/jpeg": ".jpg",
+ "image/png": ".png",
+ "image/webp": ".webp",
+ "image/gif": ".gif",
+}
+
+
+def write_image_file(data: bytes, media_type: str) -> tuple[str, str, str]:
+ """Write image bytes to the content-addressed imagestore (no DB).
+
+ Returns (sha256, ext, url).
+ """
+ sha256 = hashlib.sha256(data).hexdigest()
+ ext = _IMAGE_EXT_MAP.get(media_type, ".jpg")
+ img_path = IMAGES_DIR / sha256[:2] / f"{sha256}{ext}"
+ if not img_path.exists():
+ img_path.parent.mkdir(parents=True, exist_ok=True)
+ img_path.write_bytes(data)
+ url = f"/library/db-images/{sha256[:2]}/{sha256}{ext}"
+ return sha256, ext, url
+
+
+def store_db_image(conn, data: bytes, media_type: str) -> tuple[str, str, str]:
+ """Write image to imagestore and register in book_images table.
+
+ Returns (sha256, ext, url).
+ """
+ sha256, ext, url = write_image_file(data, media_type)
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ INSERT INTO book_images (sha256, ext, media_type, size_bytes)
+ VALUES (%s, %s, %s, %s)
+ ON CONFLICT (sha256) DO NOTHING
+ """,
+ (sha256, ext, media_type, len(data)),
+ )
+ return sha256, ext, url
+
+
+def html_to_plain(html: str) -> str:
+ """Strip HTML tags for tsvector input."""
+ from bs4 import BeautifulSoup
+ return BeautifulSoup(html, "html.parser").get_text(" ", strip=True)
+
+
+def upsert_chapter(conn, filename: str, chapter_index: int, title: str, content_html: str) -> None:
+ """Insert or replace a chapter in book_chapters and update its tsvector."""
+ plain = html_to_plain(content_html)
+ tsv_input = (title or "") + " " + plain
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ INSERT INTO book_chapters (filename, chapter_index, title, content, content_tsv)
+ VALUES (%s, %s, %s, %s, to_tsvector('simple', %s))
+ ON CONFLICT (filename, chapter_index) DO UPDATE SET
+ title = EXCLUDED.title,
+ content = EXCLUDED.content,
+ content_tsv = EXCLUDED.content_tsv
+ """,
+ (filename, chapter_index, title, content_html, tsv_input),
+ )
+
+
+def ensure_unique_db_filename(conn, base_filename: str) -> str:
+ """Return a filename that doesn't yet exist in the library table."""
+ candidate = base_filename
+ counter = 2
+ while True:
+ with conn.cursor() as cur:
+ cur.execute("SELECT 1 FROM library WHERE filename = %s", (candidate,))
+ if not cur.fetchone():
+ return candidate
+ candidate = f"{base_filename} ({counter})"
+ counter += 1
+
+
def normalize_site(raw: str) -> str:
raw = (raw or "").strip()
if "://" in raw:
diff --git a/containers/novela/routers/editor.py b/containers/novela/routers/editor.py
index b3bc9f7..0705629 100644
--- a/containers/novela/routers/editor.py
+++ b/containers/novela/routers/editor.py
@@ -12,7 +12,7 @@ from fastapi.templating import Jinja2Templates
from db import get_db_conn
from epub import read_epub_file, write_epub_file
-from routers.common import LIBRARY_DIR, resolve_library_path
+from routers.common import LIBRARY_DIR, is_db_filename, resolve_library_path, upsert_chapter
router = APIRouter()
templates = Jinja2Templates(directory="templates")
@@ -158,21 +158,40 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat
@router.get("/library/editor/{filename:path}", response_class=HTMLResponse)
async def editor_page(filename: str, request: Request):
- path = resolve_library_path(filename)
- if path is None or not path.exists():
- return HTMLResponse("Not found", status_code=404)
+ if not is_db_filename(filename):
+ path = resolve_library_path(filename)
+ if path is None or not path.exists():
+ return HTMLResponse("Not found", status_code=404)
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute("SELECT title FROM library WHERE filename = %s", (filename,))
row = cur.fetchone()
- title = row[0] if row and row[0] else filename
+ if not row:
+ return HTMLResponse("Not found", status_code=404)
+ title = row[0] if row[0] else filename
- return templates.TemplateResponse(request, "editor.html", {"filename": filename, "title": title})
+ return templates.TemplateResponse(request, "editor.html", {
+ "filename": filename,
+ "title": title,
+ "is_db": is_db_filename(filename),
+ })
@router.get("/api/edit/chapter/{index:int}/{filename:path}")
async def get_edit_chapter(filename: str, index: int):
+ if is_db_filename(filename):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "SELECT title, content FROM book_chapters WHERE filename = %s AND chapter_index = %s",
+ (filename, index),
+ )
+ row = cur.fetchone()
+ if not row:
+ return Response(status_code=404)
+ return JSONResponse({"index": index, "href": f"db:{index}", "title": row[0], "content": row[1]})
+
path = resolve_library_path(filename)
if path is None or not path.exists():
return Response(status_code=404)
@@ -186,13 +205,29 @@ async def get_edit_chapter(filename: str, index: int):
@router.post("/api/edit/chapter/{index:int}/{filename:path}")
async def save_edit_chapter(filename: str, index: int, request: Request):
+ body = await request.json()
+ content = body.get("content", "")
+
+ if is_db_filename(filename):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "SELECT title FROM book_chapters WHERE filename = %s AND chapter_index = %s",
+ (filename, index),
+ )
+ row = cur.fetchone()
+ if not row:
+ return JSONResponse({"error": "Chapter not found"}, status_code=404)
+ new_title = (body.get("title") or "").strip() or row[0]
+ with conn:
+ upsert_chapter(conn, filename, index, new_title, content)
+ return JSONResponse({"ok": True})
+
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "not found"}, status_code=404)
if not path.exists():
return JSONResponse({"error": "File not found"}, status_code=404)
- body = await request.json()
- content = body.get("content", "")
if not content:
return JSONResponse({"error": "No content"}, status_code=400)
spine = _epub_spine(path)
@@ -208,15 +243,42 @@ async def save_edit_chapter(filename: str, index: int, request: Request):
@router.post("/api/edit/chapter/add/{filename:path}")
async def add_edit_chapter(filename: str, request: Request):
+ body = await request.json()
+ title = (body.get("title") or "New chapter").strip() or "New chapter"
+ after_index = body.get("after_index", -1)
+
+ if is_db_filename(filename):
+ try:
+ after_index = int(after_index)
+ except Exception:
+ after_index = -1
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT COUNT(*) FROM book_chapters WHERE filename = %s", (filename,))
+ total = cur.fetchone()[0]
+ cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,))
+ if not cur.fetchone():
+ return JSONResponse({"error": "not found"}, status_code=404)
+ insert_idx = total if after_index < 0 or after_index >= total else after_index + 1
+ with conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "UPDATE book_chapters SET chapter_index = chapter_index + 1 WHERE filename = %s AND chapter_index >= %s",
+ (filename, insert_idx),
+ )
+ upsert_chapter(conn, filename, insert_idx, title, "")
+ return JSONResponse({"ok": True, "index": insert_idx, "count": total + 1})
+
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "not found"}, status_code=404)
if not path.exists():
return JSONResponse({"error": "File not found"}, status_code=404)
- body = await request.json()
- title = (body.get("title") or "New chapter").strip() or "New chapter"
- after_index = body.get("after_index", -1)
+ try:
+ after_index = int(after_index)
+ except Exception:
+ after_index = -1
try:
after_index = int(after_index)
except Exception:
@@ -339,6 +401,26 @@ async def add_edit_chapter(filename: str, request: Request):
@router.delete("/api/edit/chapter/{index:int}/{filename:path}")
async def delete_edit_chapter(filename: str, index: int):
+ if is_db_filename(filename):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT COUNT(*) FROM book_chapters WHERE filename = %s", (filename,))
+ total = cur.fetchone()[0]
+ if total <= 1:
+ return JSONResponse({"error": "Cannot delete the last chapter"}, status_code=400)
+ with conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "DELETE FROM book_chapters WHERE filename = %s AND chapter_index = %s",
+ (filename, index),
+ )
+ cur.execute(
+ "UPDATE book_chapters SET chapter_index = chapter_index - 1 WHERE filename = %s AND chapter_index > %s",
+ (filename, index),
+ )
+ new_total = total - 1
+ return JSONResponse({"ok": True, "index": min(index, new_total - 1), "count": new_total})
+
path = resolve_library_path(filename)
if path is None:
return JSONResponse({"error": "not found"}, status_code=404)
diff --git a/containers/novela/routers/grabber.py b/containers/novela/routers/grabber.py
index 102fd12..34ad719 100644
--- a/containers/novela/routers/grabber.py
+++ b/containers/novela/routers/grabber.py
@@ -17,11 +17,16 @@ from db import get_db_conn
from epub import detect_image_format, make_chapter_xhtml, make_epub
from routers.common import (
LIBRARY_DIR,
- ensure_cover_cache_for_book,
+ ensure_unique_db_filename,
ensure_unique_rel_path,
+ make_cover_thumb_webp,
make_rel_path,
normalize_site,
+ store_db_image,
upsert_book,
+ upsert_chapter,
+ upsert_cover_cache,
+ write_image_file,
)
from scrapers import get_scraper
from scrapers.base import HEADERS
@@ -135,22 +140,87 @@ async def debug_run(request: Request):
result: dict = {}
try:
async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client:
+ # Login
+ login_success = False
if username:
- await scraper.login(client, username, password)
+ login_success = await scraper.login(client, username, password)
+ result["login"] = {
+ "attempted": bool(username),
+ "success": login_success,
+ "username": username,
+ }
+
book = await scraper.fetch_book_info(client, url)
- result = {
+ chapters = book.get("chapters", [])
+
+ # Compute output filename
+ series = book.get("series", "")
+ series_index = int(book.get("series_index_hint", 1) or 1)
+ filename = make_rel_path(
+ media_type="epub",
+ publisher=book.get("publisher", ""),
+ author=book.get("author", ""),
+ title=book.get("title", ""),
+ series=series,
+ series_index=series_index,
+ ).as_posix()
+
+ result["meta"] = {
"title": book.get("title", ""),
"author": book.get("author", ""),
"publisher": book.get("publisher", ""),
"series": book.get("series", ""),
- "chapter_count": len(book.get("chapters", [])),
- "chapter_method": book.get("chapter_method", ""),
"genres": book.get("genres", []),
"subgenres": book.get("subgenres", []),
"tags": book.get("tags", []),
"description": book.get("description", ""),
+ "updated_date": book.get("updated_date", ""),
"publication_status": book.get("publication_status", ""),
+ "filename": filename,
}
+
+ result["chapters"] = {
+ "count": len(chapters),
+ "method": book.get("chapter_method", ""),
+ "list": chapters,
+ }
+
+ # Fetch first chapter
+ if chapters:
+ ch = chapters[0]
+ try:
+ _load_break_patterns()
+ ch_data = await scraper.fetch_chapter(client, ch)
+ content_el = ch_data.get("content_el")
+ raw_html = content_el.decode_contents() if content_el else ""
+
+ xhtml_parts = []
+ if content_el:
+ from bs4 import Tag
+ all_p = content_el.find_all("p")
+ empty_p = sum(
+ 1 for p in all_p
+ if not [c for c in p.children if isinstance(c, Tag)]
+ and not p.get_text().replace("\xa0", "").strip()
+ )
+ filled_p = len(all_p) - empty_p
+ empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5
+ for child in content_el.children:
+ part = element_to_xhtml(child, empty_p_is_spacer=empty_p_is_spacer)
+ if part.strip():
+ xhtml_parts.append(part)
+
+ result["first_chapter"] = {
+ "title": ch_data.get("title", ch["title"]),
+ "url": ch["url"],
+ "selector_id": ch_data.get("selector_id"),
+ "selector_class": ch_data.get("selector_class"),
+ "raw_html": raw_html[:8000],
+ "converted_xhtml": "\n".join(xhtml_parts)[:8000],
+ }
+ except Exception as e:
+ result["first_chapter"] = {"title": ch["title"], "url": ch["url"], "error": str(e)}
+
except Exception:
result["error"] = traceback.format_exc()
return result
@@ -330,8 +400,8 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
}
_load_break_patterns()
- break_img_data = open("static/break.png", "rb").read()
+ # Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]}
chapters = []
for i, ch in enumerate(book["chapters"], 1):
send("progress", {"current": i, "total": len(book["chapters"]), "title": ch["title"]})
@@ -339,11 +409,11 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
ch_data = await scraper.fetch_chapter(client, ch)
content_el = ch_data["content_el"]
- chapter_images = []
+ # Download images and store to disk (no DB yet); rewrite src to absolute URL
if content_el:
- img_counter = 1
for img_tag in content_el.find_all("img"):
if is_break_element(img_tag):
+ img_tag.decompose()
continue
src = img_tag.get("src", "")
if not src or src.startswith("data:"):
@@ -352,19 +422,16 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
try:
img_resp = await client.get(urljoin(ch["url"], src))
if img_resp.status_code == 200:
- img_name, img_mime = detect_image_format(
- img_resp.content, f"ch{i:03d}_img{img_counter:03d}"
+ _, img_mime = detect_image_format(
+ img_resp.content, f"ch{i:03d}_img"
)
- img_tag["src"] = f"../Images/{img_name}"
+ sha, ext_i, url = write_image_file(img_resp.content, img_mime)
+ img_tag["src"] = url
img_tag["alt"] = img_tag.get("alt", "")
- chapter_images.append(
- {
- "epub_path": f"OEBPS/Images/{img_name}",
- "data": img_resp.content,
- "media_type": img_mime,
- }
- )
- img_counter += 1
+ img_tag.attrs = {
+ k: v for k, v in img_tag.attrs.items()
+ if k in ("src", "alt", "width", "height")
+ }
else:
img_tag.decompose()
except Exception:
@@ -386,9 +453,8 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
if part.strip():
xhtml_parts.append(part)
- content_xhtml = "\n".join(xhtml_parts)
- chapter_xhtml = make_chapter_xhtml(ch_data["title"], content_xhtml, i)
- chapters.append({"title": ch_data["title"], "xhtml": chapter_xhtml, "images": chapter_images})
+ content_html = "\n".join(xhtml_parts)
+ chapters.append({"title": ch_data["title"], "content_html": content_html})
await asyncio.sleep(0.2)
except Exception as e:
send("warning", {"message": f"Chapter {i} skipped: {e}"})
@@ -398,12 +464,30 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
job["done"] = True
return
- send("status", {"message": "Building EPUB..."})
- book_id = str(uuid.uuid4())
- epub_bytes = make_epub(book_title, author, chapters, cover_data, break_img_data, book_id, book_info)
+ storage_mode = job.get("storage_mode", "db")
+ send("status", {"message": "Saving to library..."})
- rel = ensure_unique_rel_path(
- make_rel_path(
+ book_tags = (
+ [(g, "genre") for g in book_info.get("genres", [])]
+ + [(g, "subgenre") for g in book_info.get("subgenres", [])]
+ + [(g, "tag") for g in book_info.get("tags", [])]
+ )
+
+ if storage_mode == "epub":
+ # ── EPUB file on disk ──────────────────────────────────────────
+ epub_chapters = [
+ {"title": ch["title"], "xhtml": make_chapter_xhtml(ch["title"], ch["content_html"], i + 1), "images": []}
+ for i, ch in enumerate(chapters)
+ ]
+ try:
+ break_img_data = open("static/break.png", "rb").read()
+ except Exception:
+ break_img_data = b""
+ epub_bytes = make_epub(
+ book_title, author, epub_chapters, cover_data, break_img_data,
+ str(uuid.uuid4()), book_info,
+ )
+ rel_path = make_rel_path(
media_type="epub",
publisher=book_info.get("publisher", ""),
author=author,
@@ -411,40 +495,78 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
series=series,
series_index=series_index,
)
- )
- out_path = LIBRARY_DIR / rel
- out_path.parent.mkdir(parents=True, exist_ok=True)
- out_path.write_bytes(epub_bytes)
+ rel_path = ensure_unique_rel_path(rel_path)
+ out_path = LIBRARY_DIR / rel_path
+ out_path.parent.mkdir(parents=True, exist_ok=True)
+ out_path.write_bytes(epub_bytes)
+ rel_filename = rel_path.as_posix()
+
+ book_meta = {
+ "media_type": "epub",
+ "storage_type": "file",
+ "has_cover": cover_data is not None,
+ "series": series,
+ "series_index": series_index if series else 0,
+ "title": book_title,
+ "publication_status": book_info.get("publication_status", ""),
+ "author": author,
+ "publisher": book_info.get("publisher", ""),
+ "source_url": book_info.get("source_url", ""),
+ "description": book_info.get("description", ""),
+ "publish_date": final_updated_date,
+ "needs_review": False,
+ }
+ with get_db_conn() as conn:
+ with conn:
+ upsert_book(conn, rel_filename, book_meta, book_tags)
+ if cover_data:
+ try:
+ thumb = make_cover_thumb_webp(cover_data)
+ upsert_cover_cache(conn, rel_filename, "image/webp", thumb)
+ except Exception:
+ pass
+
+ else:
+ # ── DB storage (default) ───────────────────────────────────────
+ base_filename = make_rel_path(
+ media_type="db",
+ publisher=book_info.get("publisher", ""),
+ author=author,
+ title=book_title,
+ series=series,
+ series_index=series_index,
+ ).as_posix()
+
+ book_meta = {
+ "media_type": "epub",
+ "storage_type": "db",
+ "has_cover": cover_data is not None,
+ "series": book_info.get("series", ""),
+ "series_index": series_index if book_info.get("series") else 0,
+ "title": book_title,
+ "publication_status": book_info.get("publication_status", ""),
+ "author": author,
+ "publisher": book_info.get("publisher", ""),
+ "source_url": book_info.get("source_url", ""),
+ "description": book_info.get("description", ""),
+ "publish_date": final_updated_date,
+ "needs_review": False,
+ }
+ with get_db_conn() as conn:
+ with conn:
+ rel_filename = ensure_unique_db_filename(conn, base_filename)
+ upsert_book(conn, rel_filename, book_meta, book_tags)
+ for idx, ch in enumerate(chapters):
+ upsert_chapter(conn, rel_filename, idx, ch["title"], ch["content_html"])
+ if cover_data:
+ try:
+ thumb = make_cover_thumb_webp(cover_data)
+ upsert_cover_cache(conn, rel_filename, "image/webp", thumb)
+ except Exception:
+ pass
- rel_filename = rel.as_posix()
job["filename"] = rel_filename
-
- book_meta = {
- "media_type": "epub",
- "has_cover": cover_data is not None,
- "series": book_info.get("series", ""),
- "series_index": series_index if book_info.get("series") else 0,
- "title": book_title,
- "publication_status": book_info.get("publication_status", ""),
- "author": author,
- "publisher": book_info.get("publisher", ""),
- "source_url": book_info.get("source_url", ""),
- "description": book_info.get("description", ""),
- "publish_date": final_updated_date,
- "needs_review": False,
- }
- book_tags = (
- [(g, "genre") for g in book_info.get("genres", [])]
- + [(g, "subgenre") for g in book_info.get("subgenres", [])]
- + [(g, "tag") for g in book_info.get("tags", [])]
- )
-
- with get_db_conn() as conn:
- with conn:
- upsert_book(conn, rel_filename, book_meta, book_tags)
- ensure_cover_cache_for_book(conn, rel_filename, out_path, "epub")
-
- send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters)})
+ send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters), "storage_type": storage_mode})
job["done"] = True
@@ -471,6 +593,7 @@ async def convert(request: Request):
job["series_index"] = int(body.get("series_index", 1) or 1)
job["updated_date_override"] = (body.get("updated_date") or "").strip()
+ job["storage_mode"] = "epub" if body.get("storage_mode") == "epub" else "db"
JOBS[job_id] = job
asyncio.create_task(scrape_book(job_id, url, username, password))
diff --git a/containers/novela/routers/library.py b/containers/novela/routers/library.py
index e0ca3ee..e040dc4 100644
--- a/containers/novela/routers/library.py
+++ b/containers/novela/routers/library.py
@@ -15,6 +15,7 @@ from routers.common import (
LIBRARY_DIR,
ensure_cover_cache_for_book,
ensure_unique_rel_path,
+ is_db_filename,
list_library_json,
make_cover_thumb_webp,
make_rel_path,
@@ -175,6 +176,17 @@ async def library_download(filename: str):
@router.delete("/library/file/{filename:path}")
async def library_delete(filename: str):
+ if is_db_filename(filename):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,))
+ if not cur.fetchone():
+ return {"error": "Not found"}
+ with conn:
+ with conn.cursor() as cur:
+ cur.execute("DELETE FROM library WHERE filename = %s", (filename,))
+ return {"ok": True}
+
full = resolve_library_path(filename)
if full is None:
return {"error": "Invalid filename"}
@@ -233,9 +245,10 @@ async def library_bulk_delete(request: Request):
@router.get("/library/cover-cached/{filename:path}")
async def library_cover_cached(filename: str):
- full = resolve_library_path(filename)
- if full is None or not full.exists():
- return Response(status_code=404)
+ if not is_db_filename(filename):
+ full = resolve_library_path(filename)
+ if full is None or not full.exists():
+ return Response(status_code=404)
with get_db_conn() as conn:
with conn:
@@ -266,6 +279,19 @@ async def library_cover_cached(filename: str):
@router.get("/library/cover/{filename:path}")
async def library_cover(filename: str):
+ if is_db_filename(filename):
+ # DB books: cover is always served from the cache
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
+ (filename,),
+ )
+ row = cur.fetchone()
+ if row and row[0]:
+ return Response(content=bytes(row[0]), media_type="image/webp")
+ return Response(status_code=404)
+
full = resolve_library_path(filename)
if full is None or not full.exists():
return Response(status_code=404)
diff --git a/containers/novela/routers/reader.py b/containers/novela/routers/reader.py
index e0bb980..8f5b6fc 100644
--- a/containers/novela/routers/reader.py
+++ b/containers/novela/routers/reader.py
@@ -16,9 +16,22 @@ from fastapi.templating import Jinja2Templates
from cbr import cbr_get_page, cbr_page_count
from db import get_db_conn
-from epub import read_epub_file, write_epub_file
+from epub import make_chapter_xhtml, make_epub, read_epub_file, write_epub_file
from pdf import pdf_page_count, pdf_render_page
-from routers.common import LIBRARY_DIR, prune_empty_dirs, resolve_library_path, scan_epub
+from routers.common import (
+ IMAGES_DIR,
+ LIBRARY_DIR,
+ ensure_unique_db_filename,
+ is_db_filename,
+ make_cover_thumb_webp,
+ make_rel_path,
+ prune_empty_dirs,
+ resolve_library_path,
+ scan_epub,
+ upsert_chapter,
+ upsert_cover_cache,
+ write_image_file,
+)
router = APIRouter()
templates = Jinja2Templates(directory="templates")
@@ -459,6 +472,21 @@ def _guard(filename: str) -> bool:
# Routes
# ---------------------------------------------------------------------------
+@router.get("/library/db-images/{path:path}")
+async def serve_db_image(path: str):
+ """Serve an image from the content-addressed imagestore."""
+ img_path = (IMAGES_DIR / path).resolve()
+ try:
+ img_path.relative_to(IMAGES_DIR.resolve())
+ except ValueError:
+ return Response(status_code=404)
+ if not img_path.exists():
+ return Response(status_code=404)
+ ext = img_path.suffix.lower()
+ mt = {".jpg": "image/jpeg", ".png": "image/png", ".webp": "image/webp", ".gif": "image/gif"}.get(ext, "application/octet-stream")
+ return FileResponse(img_path, media_type=mt)
+
+
@router.get("/library/epub/{filename:path}")
async def library_epub(filename: str):
"""Serve EPUB inline (no Content-Disposition: attachment) for the reader."""
@@ -472,6 +500,18 @@ async def library_epub(filename: str):
@router.get("/library/chapters/{filename:path}")
async def get_chapter_list(filename: str):
+ if is_db_filename(filename):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "SELECT chapter_index, title FROM book_chapters WHERE filename = %s ORDER BY chapter_index",
+ (filename,),
+ )
+ rows = cur.fetchall()
+ if not rows:
+ return Response(status_code=404)
+ return [{"index": r[0], "title": r[1], "href": f"db:{r[0]}"} for r in rows]
+
path = resolve_library_path(filename)
if path is None:
return Response(status_code=404)
@@ -482,7 +522,24 @@ async def get_chapter_list(filename: str):
@router.get("/library/chapter/{index}/{filename:path}")
async def get_chapter_html(filename: str, index: int):
- """Extract a single chapter from the EPUB and return it as an HTML fragment."""
+ """Extract a single chapter from the EPUB (or DB) and return it as an HTML fragment."""
+ if is_db_filename(filename):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "SELECT title, content FROM book_chapters WHERE filename = %s AND chapter_index = %s",
+ (filename, index),
+ )
+ row = cur.fetchone()
+ if not row:
+ return Response(status_code=404)
+ title, content = row
+ safe_title = _html.escape(title or "")
+ return Response(
+ f'
{safe_title}
\n{content}\n',
+ media_type="text/html",
+ )
+
path = resolve_library_path(filename)
if path is None:
return Response(status_code=404)
@@ -605,11 +662,16 @@ async def save_progress(filename: str, request: Request):
@router.post("/library/mark-read/{filename:path}")
async def library_mark_read(filename: str, request: Request):
- if resolve_library_path(filename) is None:
- return {"error": "Invalid filename"}
- path = resolve_library_path(filename)
- if path is None or not path.exists():
- return {"error": "File not found"}
+ if not is_db_filename(filename):
+ path = resolve_library_path(filename)
+ if path is None or not path.exists():
+ return {"error": "File not found"}
+ else:
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,))
+ if not cur.fetchone():
+ return {"error": "Not found"}
body = {}
try:
body = await request.json()
@@ -635,18 +697,23 @@ async def library_mark_read(filename: str, request: Request):
@router.get("/library/book/{filename:path}", response_class=HTMLResponse)
async def book_detail_page(filename: str, request: Request):
- path = resolve_library_path(filename)
- if path is None:
- return HTMLResponse("Not found", status_code=404)
- if not path.exists():
- return HTMLResponse("Not found", status_code=404)
+ db_book = is_db_filename(filename)
+ if not db_book:
+ path = resolve_library_path(filename)
+ if path is None:
+ return HTMLResponse("Not found", status_code=404)
+ if not path.exists():
+ return HTMLResponse("Not found", status_code=404)
+ else:
+ path = None
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT title, author, publisher, has_cover, series, series_index,
publication_status, want_to_read, source_url, archived, publish_date, description,
- rating, COALESCE(series_suffix, '') AS series_suffix
+ rating, COALESCE(series_suffix, '') AS series_suffix,
+ COALESCE(storage_type, 'file') AS storage_type
FROM library WHERE filename = %s
""",
(filename,),
@@ -668,9 +735,12 @@ async def book_detail_page(filename: str, request: Request):
"publish_date": lib_row[10].isoformat() if lib_row[10] else "",
"description": lib_row[11] or "",
"rating": lib_row[12] or 0,
+ "storage_type": lib_row[14] or "file",
}
- # Supplement empty fields from EPUB metadata
- if not entry["source_url"] or not entry["publish_date"] or not entry["description"]:
+ # Supplement empty fields from EPUB metadata (file-based books only)
+ if not db_book and path and (
+ not entry["source_url"] or not entry["publish_date"] or not entry["description"]
+ ):
epub_meta = scan_epub(path)
if not entry["source_url"]:
entry["source_url"] = epub_meta.get("source_url", "")
@@ -679,12 +749,15 @@ async def book_detail_page(filename: str, request: Request):
if not entry["description"]:
entry["description"] = epub_meta.get("description", "")
else:
+ if db_book:
+ return HTMLResponse("Not found", status_code=404)
entry = scan_epub(path)
entry.setdefault("want_to_read", False)
entry.setdefault("archived", False)
entry.setdefault("publish_date", "")
entry.setdefault("description", "")
entry.setdefault("rating", 0)
+ entry.setdefault("storage_type", "file")
cur.execute(
"SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag_type, tag",
@@ -702,7 +775,7 @@ async def book_detail_page(filename: str, request: Request):
else:
tags_list.append(tag)
- if not rows:
+ if not rows and not db_book and path:
# Fallback for books where tags only exist in OPF after DB loss/rebuild.
epub_meta = scan_epub(path)
for subject in epub_meta.get("subjects", []):
@@ -758,6 +831,7 @@ async def book_detail_page(filename: str, request: Request):
"cfi": cfi,
"rating": entry.get("rating", 0),
"series_is_indexed": series_is_indexed,
+ "storage_type": entry.get("storage_type", "file"),
})
@@ -802,10 +876,6 @@ async def api_suggestions(type: str | None = None):
@router.patch("/library/book/{filename:path}")
async def book_update(filename: str, request: Request):
"""Update book metadata and tags, and rename/move the file when needed."""
- old_path = resolve_library_path(filename)
- if old_path is None or not old_path.exists():
- return JSONResponse({"error": "not found"}, status_code=404)
-
body = await request.json()
title = body.get("title", "")
author = body.get("author", "")
@@ -813,6 +883,89 @@ async def book_update(filename: str, request: Request):
series = body.get("series", "")
from routers.common import parse_volume_str
series_index, series_suffix = parse_volume_str(body.get("series_index", ""))
+
+ # --- DB-stored book branch (no file on disk) ---
+ if is_db_filename(filename):
+ base_new = make_rel_path(
+ media_type="db",
+ publisher=publisher,
+ author=author,
+ title=title,
+ series=series,
+ series_index=series_index,
+ series_suffix=series_suffix,
+ ).as_posix()
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,))
+ if not cur.fetchone():
+ return JSONResponse({"error": "not found"}, status_code=404)
+ new_filename = ensure_unique_db_filename(conn, base_new) if base_new != filename else filename
+ with conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT has_cover FROM library WHERE filename = %s", (filename,))
+ row = cur.fetchone()
+ has_cover = bool(row[0]) if row else False
+ cur.execute(
+ """
+ INSERT INTO library (
+ filename, title, author, publisher, has_cover,
+ series, series_index, series_suffix, publication_status,
+ source_url, publish_date, description,
+ archived, needs_review, storage_type, updated_at
+ )
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, 'db', NOW())
+ ON CONFLICT (filename) DO UPDATE SET
+ title = EXCLUDED.title,
+ author = EXCLUDED.author,
+ publisher = EXCLUDED.publisher,
+ series = EXCLUDED.series,
+ series_index = EXCLUDED.series_index,
+ series_suffix = EXCLUDED.series_suffix,
+ publication_status = EXCLUDED.publication_status,
+ source_url = EXCLUDED.source_url,
+ publish_date = EXCLUDED.publish_date,
+ description = EXCLUDED.description,
+ needs_review = FALSE,
+ updated_at = NOW()
+ """,
+ (
+ new_filename, title, author, publisher, has_cover,
+ series, series_index if series else 0,
+ series_suffix if series else "",
+ body.get("publication_status", ""),
+ body.get("source_url", ""),
+ body.get("publish_date") or None,
+ body.get("description", ""),
+ ),
+ )
+ if new_filename != filename:
+ cur.execute("UPDATE book_tags SET filename = %s WHERE filename = %s", (new_filename, filename))
+ cur.execute("UPDATE reading_progress SET filename = %s WHERE filename = %s", (new_filename, filename))
+ cur.execute("UPDATE reading_sessions SET filename = %s WHERE filename = %s", (new_filename, filename))
+ cur.execute("UPDATE library_cover_cache SET filename = %s WHERE filename = %s", (new_filename, filename))
+ cur.execute("UPDATE book_chapters SET filename = %s WHERE filename = %s", (new_filename, filename))
+ cur.execute("UPDATE bookmarks SET filename = %s WHERE filename = %s", (new_filename, filename))
+ cur.execute("DELETE FROM library WHERE filename = %s", (filename,))
+ cur.execute("DELETE FROM book_tags WHERE filename = %s", (new_filename,))
+ rows = (
+ [(new_filename, g, "genre") for g in body.get("genres", []) if g]
+ + [(new_filename, g, "subgenre") for g in body.get("subgenres", []) if g]
+ + [(new_filename, g, "tag") for g in body.get("tags", []) if g]
+ )
+ if rows:
+ cur.executemany(
+ "INSERT INTO book_tags (filename, tag, tag_type) VALUES (%s, %s, %s)"
+ " ON CONFLICT (filename, tag, tag_type) DO NOTHING",
+ rows,
+ )
+ return JSONResponse({"ok": True, "filename": new_filename, "renamed": new_filename != filename})
+
+ # --- File-based book branch ---
+ old_path = resolve_library_path(filename)
+ if old_path is None or not old_path.exists():
+ return JSONResponse({"error": "not found"}, status_code=404)
+
ext = old_path.suffix.lower()
target_rel = _make_rel_path(
@@ -928,9 +1081,12 @@ async def book_update(filename: str, request: Request):
@router.post("/library/rating/{filename:path}")
async def set_rating(filename: str, request: Request):
"""Set (or clear) a 1-5 star rating for a book. rating=0 removes it."""
- path = resolve_library_path(filename)
- if path is None or not path.exists():
- return JSONResponse({"error": "not found"}, status_code=404)
+ if not is_db_filename(filename):
+ path = resolve_library_path(filename)
+ if path is None or not path.exists():
+ return JSONResponse({"error": "not found"}, status_code=404)
+ else:
+ path = None
body = await request.json()
try:
@@ -938,17 +1094,18 @@ async def set_rating(filename: str, request: Request):
except (TypeError, ValueError):
return JSONResponse({"error": "invalid rating"}, status_code=400)
- ext = path.suffix.lower()
- if ext == ".epub":
- try:
- _write_epub_rating(path, rating)
- except Exception as e:
- return JSONResponse({"error": f"epub write failed: {e}"}, status_code=500)
- elif ext == ".cbz":
- try:
- _write_cbz_rating(path, rating)
- except Exception as e:
- return JSONResponse({"error": f"cbz write failed: {e}"}, status_code=500)
+ if path is not None:
+ ext = path.suffix.lower()
+ if ext == ".epub":
+ try:
+ _write_epub_rating(path, rating)
+ except Exception as e:
+ return JSONResponse({"error": f"epub write failed: {e}"}, status_code=500)
+ elif ext == ".cbz":
+ try:
+ _write_cbz_rating(path, rating)
+ except Exception as e:
+ return JSONResponse({"error": f"cbz write failed: {e}"}, status_code=500)
with get_db_conn() as conn:
with conn:
@@ -961,18 +1118,313 @@ async def set_rating(filename: str, request: Request):
return JSONResponse({"ok": True, "rating": rating})
+# ---------------------------------------------------------------------------
+# Fase 4 — EPUB → DB conversion
+# ---------------------------------------------------------------------------
+
+def _epub_body_inner(xhtml: str, z: zf.ZipFile, href: str) -> tuple[str, list[dict]]:
+ """Parse an EPUB chapter XHTML, rewrite inline images to imagestore URLs.
+
+ Returns (inner_html_without_body_tags, []). Images are written to disk but
+ not registered in book_images here (that happens in the final DB transaction).
+ """
+ soup = BeautifulSoup(xhtml, "lxml")
+ body = soup.find("body")
+ if not body:
+ return "", []
+
+ href_dir = href.rsplit("/", 1)[0] if "/" in href else ""
+ names = z.namelist()
+
+ for img in body.find_all("img"):
+ src = img.get("src", "")
+ if not src or src.startswith("http") or src.startswith("data:"):
+ continue
+ # Resolve relative path inside ZIP
+ parts = (href_dir.split("/") if href_dir else []) + src.split("/")
+ resolved: list[str] = []
+ for p in parts:
+ if p == "..":
+ if resolved:
+ resolved.pop()
+ elif p:
+ resolved.append(p)
+ zip_path = "/".join(resolved)
+ img_data: bytes | None = None
+ if zip_path in names:
+ img_data = z.read(zip_path)
+ else:
+ lo = zip_path.lower()
+ match = next((n for n in names if n.lower() == lo), None)
+ if match:
+ img_data = z.read(match)
+ if img_data:
+ ext_s = zip_path.rsplit(".", 1)[-1].lower() if "." in zip_path else "jpg"
+ mime = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
+ "webp": "image/webp", "gif": "image/gif"}.get(ext_s, "image/jpeg")
+ _, _, url = write_image_file(img_data, mime)
+ img["src"] = url
+ else:
+ img.decompose()
+
+ # Strip leading heading — EPUB chapters often open with the chapter title as
+ # an //. The chapter endpoint always prepends its own
+ # , so keep the stored content heading-free.
+ for child in list(body.children):
+ if getattr(child, "name", None) is None:
+ continue # NavigableString / text node — skip
+ if not child.get_text(strip=True):
+ child.decompose()
+ continue
+ if child.name in ("h1", "h2", "h3"):
+ child.decompose()
+ break
+
+ return body.decode_contents(), []
+
+
+@router.post("/api/library/convert-to-db/{filename:path}")
+async def convert_to_db(filename: str):
+ """Convert a file-based EPUB to DB storage."""
+ if is_db_filename(filename):
+ return JSONResponse({"error": "Already a DB book"}, status_code=400)
+
+ old_path = resolve_library_path(filename)
+ if old_path is None or not old_path.exists():
+ return JSONResponse({"error": "File not found"}, status_code=404)
+ if old_path.suffix.lower() != ".epub":
+ return JSONResponse({"error": "Only EPUB files can be converted"}, status_code=400)
+
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ "SELECT title, author, publisher, series, series_index, series_suffix "
+ "FROM library WHERE filename = %s",
+ (filename,),
+ )
+ row = cur.fetchone()
+ if not row:
+ return JSONResponse({"error": "Book not in library"}, status_code=404)
+ title, author, publisher, series, series_index, series_suffix = row
+
+ # Extract chapters from EPUB
+ try:
+ spine = _epub_spine(old_path)
+ chapters = []
+ with zf.ZipFile(old_path, "r") as z:
+ for entry in spine:
+ try:
+ xhtml = z.read(entry["href"]).decode("utf-8", errors="replace")
+ except KeyError:
+ continue
+ inner, _ = _epub_body_inner(xhtml, z, entry["href"])
+ if inner.strip():
+ chapters.append({"title": entry["title"], "content_html": inner})
+ except Exception as e:
+ return JSONResponse({"error": f"Failed to extract EPUB: {e}"}, status_code=500)
+
+ if not chapters:
+ return JSONResponse({"error": "No chapters found"}, status_code=400)
+
+ base_fn = make_rel_path(
+ media_type="db",
+ publisher=publisher or "",
+ author=author or "",
+ title=title or "",
+ series=series or "",
+ series_index=series_index or 0,
+ series_suffix=series_suffix or "",
+ ).as_posix()
+
+ with get_db_conn() as conn:
+ with conn:
+ new_fn = ensure_unique_db_filename(conn, base_fn)
+ with conn.cursor() as cur:
+ # Insert new library row
+ cur.execute(
+ """
+ INSERT INTO library (filename, media_type, storage_type, title, author, publisher,
+ has_cover, series, series_index, series_suffix, publication_status,
+ source_url, publish_date, description, archived, want_to_read,
+ needs_review, rating, created_at, updated_at)
+ SELECT %s, media_type, 'db', title, author, publisher,
+ has_cover, series, series_index, series_suffix, publication_status,
+ source_url, publish_date, description, archived, want_to_read,
+ needs_review, rating, created_at, NOW()
+ FROM library WHERE filename = %s
+ """,
+ (new_fn, filename),
+ )
+ # Migrate child tables
+ cur.execute("UPDATE book_tags SET filename = %s WHERE filename = %s", (new_fn, filename))
+ cur.execute("UPDATE reading_progress SET filename = %s WHERE filename = %s", (new_fn, filename))
+ cur.execute(
+ "INSERT INTO reading_sessions (filename, read_at) SELECT %s, read_at FROM reading_sessions WHERE filename = %s",
+ (new_fn, filename),
+ )
+ cur.execute("DELETE FROM reading_sessions WHERE filename = %s", (filename,))
+ cur.execute("UPDATE bookmarks SET filename = %s WHERE filename = %s", (new_fn, filename))
+ cur.execute(
+ "INSERT INTO library_cover_cache (filename, mime_type, thumb_webp, updated_at) "
+ "SELECT %s, mime_type, thumb_webp, updated_at FROM library_cover_cache WHERE filename = %s",
+ (new_fn, filename),
+ )
+ cur.execute("DELETE FROM library_cover_cache WHERE filename = %s", (filename,))
+
+ # Insert chapters
+ for idx, ch in enumerate(chapters):
+ upsert_chapter(conn, new_fn, idx, ch["title"], ch["content_html"])
+
+ with conn.cursor() as cur:
+ cur.execute("DELETE FROM library WHERE filename = %s", (filename,))
+
+ try:
+ old_path.unlink()
+ prune_empty_dirs(old_path.parent)
+ except Exception:
+ pass
+
+ return JSONResponse({"ok": True, "new_filename": new_fn})
+
+
+# ---------------------------------------------------------------------------
+# Fase 5 — DB → EPUB export
+# ---------------------------------------------------------------------------
+
+def _rewrite_db_images_for_epub(content_html: str, seen: dict[str, str]) -> tuple[str, list[dict]]:
+ """Replace /library/db-images/... img src with EPUB-internal paths.
+
+ seen: sha256 → epub_path (deduplication across chapters)
+ Returns (modified_html, new_image_dicts) where dicts have epub_path/data/media_type.
+ """
+ soup = BeautifulSoup(content_html, "html.parser")
+ new_images: list[dict] = []
+ for img in soup.find_all("img"):
+ src = img.get("src", "")
+ if not src.startswith("/library/db-images/"):
+ continue
+ rel = src[len("/library/db-images/"):]
+ img_file = IMAGES_DIR / rel
+ if not img_file.exists():
+ img.decompose()
+ continue
+ sha256 = img_file.stem
+ ext = img_file.suffix.lower()
+ if sha256 not in seen:
+ epub_path = f"OEBPS/Images/{sha256}{ext}"
+ seen[sha256] = epub_path
+ mime = {".jpg": "image/jpeg", ".png": "image/png",
+ ".webp": "image/webp", ".gif": "image/gif"}.get(ext, "image/jpeg")
+ new_images.append({"epub_path": epub_path, "data": img_file.read_bytes(), "media_type": mime})
+ img["src"] = f"../Images/{sha256}{ext}"
+ return str(soup), new_images
+
+
+@router.get("/api/library/export-epub/{filename:path}")
+async def export_epub(filename: str):
+ """Export a DB-stored book as an EPUB download (no file written to disk)."""
+ if not is_db_filename(filename):
+ return JSONResponse({"error": "Not a DB book"}, status_code=400)
+
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ """SELECT title, author, publisher, series, series_index, publication_status,
+ source_url, description, publish_date
+ FROM library WHERE filename = %s""",
+ (filename,),
+ )
+ meta_row = cur.fetchone()
+ if not meta_row:
+ return JSONResponse({"error": "Not found"}, status_code=404)
+
+ cur.execute(
+ "SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag_type, tag",
+ (filename,),
+ )
+ tag_rows = cur.fetchall()
+
+ cur.execute(
+ "SELECT chapter_index, title, content FROM book_chapters "
+ "WHERE filename = %s ORDER BY chapter_index",
+ (filename,),
+ )
+ ch_rows = cur.fetchall()
+
+ cur.execute(
+ "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
+ (filename,),
+ )
+ cover_row = cur.fetchone()
+
+ title, author, publisher, series, series_index, pub_status, source_url, description, pub_date = meta_row
+ cover_data: bytes | None = bytes(cover_row[0]) if cover_row and cover_row[0] else None
+
+ genres = [t for t, tp in tag_rows if tp == "genre"]
+ subgenres = [t for t, tp in tag_rows if tp == "subgenre"]
+ tags = [t for t, tp in tag_rows if tp in ("tag", "subject")]
+
+ book_info = {
+ "genres": genres, "subgenres": subgenres, "tags": tags,
+ "description": description or "",
+ "source_url": source_url or "",
+ "publisher": publisher or "",
+ "series": series or "",
+ "series_index": series_index or 1,
+ "publication_status": pub_status or "",
+ "updated_date": pub_date.isoformat() if pub_date else "",
+ }
+
+ seen_images: dict[str, str] = {}
+ chapters = []
+ for ch_idx, ch_title, ch_content in ch_rows:
+ modified_html, new_imgs = _rewrite_db_images_for_epub(ch_content, seen_images)
+ chapter_xhtml = make_chapter_xhtml(ch_title or f"Chapter {ch_idx + 1}", modified_html, ch_idx + 1)
+ chapters.append({"title": ch_title or f"Chapter {ch_idx + 1}", "xhtml": chapter_xhtml, "images": new_imgs})
+
+ try:
+ break_img_data = open("static/break.png", "rb").read()
+ except Exception:
+ break_img_data = b""
+
+ book_id = str(uuid.uuid4())
+ epub_bytes = make_epub(
+ title or "Untitled", author or "Unknown", chapters,
+ cover_data, break_img_data, book_id, book_info,
+ )
+
+ safe_title = re.sub(r'[^\w\-. ]', '', (title or "book")).strip() or "book"
+ return Response(
+ content=epub_bytes,
+ media_type="application/epub+zip",
+ headers={"Content-Disposition": f'attachment; filename="{safe_title}.epub"'},
+ )
+
+
@router.get("/library/read/{filename:path}", response_class=HTMLResponse)
async def reader_page(filename: str, request: Request):
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT title FROM library WHERE filename = %s", (filename,))
+ row = cur.fetchone()
+
+ if is_db_filename(filename):
+ if not row:
+ return HTMLResponse("Not found", status_code=404)
+ title = row[0] if row[0] else filename
+ return templates.TemplateResponse(request, "reader.html", {
+ "filename": filename,
+ "title": title,
+ "format": "epub",
+ "epub_url": "",
+ })
+
path = resolve_library_path(filename)
if path is None:
return HTMLResponse("Not found", status_code=404)
if not path.exists():
return HTMLResponse("Not found", status_code=404)
- with get_db_conn() as conn:
- with conn.cursor() as cur:
- cur.execute("SELECT title FROM library WHERE filename = %s", (filename,))
- row = cur.fetchone()
- title = row[0] if row and row[0] else filename
+ title = row[0] if row and row[0] else filename
fmt = path.suffix.lower().lstrip(".")
return templates.TemplateResponse(request, "reader.html", {
"filename": filename,
diff --git a/containers/novela/routers/search.py b/containers/novela/routers/search.py
new file mode 100644
index 0000000..8325bb0
--- /dev/null
+++ b/containers/novela/routers/search.py
@@ -0,0 +1,63 @@
+"""search.py — Full-text search over DB-stored book chapters."""
+
+from fastapi import APIRouter, Request
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.templating import Jinja2Templates
+
+from db import get_db_conn
+
+router = APIRouter()
+templates = Jinja2Templates(directory="templates")
+
+
+@router.get("/search", response_class=HTMLResponse)
+async def search_page(request: Request):
+ return templates.TemplateResponse(request, "search.html", {"active": "search"})
+
+
+@router.get("/api/search")
+async def api_search(q: str = ""):
+ q = q.strip()
+ if not q or len(q) > 500:
+ return JSONResponse([])
+ with get_db_conn() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ """
+ SELECT
+ l.filename,
+ l.title,
+ l.author,
+ bc.chapter_index,
+ bc.title AS chapter_title,
+ ts_headline(
+ 'simple', bc.content,
+ plainto_tsquery('simple', %s),
+ 'MaxFragments=1, MaxWords=25, MinWords=8, StartSel=, StopSel='
+ ) AS snippet,
+ ts_rank(bc.content_tsv, plainto_tsquery('simple', %s)) AS rank
+ FROM book_chapters bc
+ JOIN library l ON l.filename = bc.filename
+ WHERE (bc.content_tsv @@ plainto_tsquery('simple', %s)
+ OR LOWER(bc.title) LIKE LOWER('%%' || %s || '%%'))
+ AND NOT l.archived
+ ORDER BY rank DESC, bc.chapter_index ASC
+ LIMIT 30
+ """,
+ (q, q, q, q),
+ )
+ rows = cur.fetchall()
+
+ results = [
+ {
+ "filename": r[0],
+ "title": r[1] or "",
+ "author": r[2] or "",
+ "chapter_index": r[3],
+ "chapter_title": r[4] or "",
+ "snippet": r[5] or "",
+ "rank": float(r[6]),
+ }
+ for r in rows
+ ]
+ return JSONResponse(results)
diff --git a/containers/novela/scrapers/__init__.py b/containers/novela/scrapers/__init__.py
index 3fdb823..18fb974 100644
--- a/containers/novela/scrapers/__init__.py
+++ b/containers/novela/scrapers/__init__.py
@@ -1,9 +1,11 @@
from .base import BaseScraper
+from .archiveofourown import ArchiveOfOurOwnScraper
from .awesomedude import AwesomeDudeScraper
from .gayauthors import GayAuthorsScraper
# Register scrapers in priority order (first match wins)
_SCRAPERS: list[type[BaseScraper]] = [
+ ArchiveOfOurOwnScraper,
AwesomeDudeScraper,
GayAuthorsScraper,
]
diff --git a/containers/novela/scrapers/archiveofourown.py b/containers/novela/scrapers/archiveofourown.py
new file mode 100644
index 0000000..f82b02f
--- /dev/null
+++ b/containers/novela/scrapers/archiveofourown.py
@@ -0,0 +1,206 @@
+import re
+from urllib.parse import urljoin
+
+import httpx
+from bs4 import BeautifulSoup
+
+from .base import BaseScraper
+
+AO3_BASE = "https://archiveofourown.org"
+
+
+class ArchiveOfOurOwnScraper(BaseScraper):
+
+ @classmethod
+ def matches(cls, url: str) -> bool:
+ return "archiveofourown.org" in url
+
+ def _work_base_url(self, url: str) -> str:
+ """Strip chapter segment and query string; return /works/NNNNNN base URL."""
+ m = re.search(r"(https?://[^/]+/works/\d+)", url)
+ return m.group(1) if m else url.rstrip("/")
+
+ async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool:
+ r = await client.get(AO3_BASE + "/users/login")
+ soup = BeautifulSoup(r.text, "html.parser")
+ token_el = soup.find("input", {"name": "authenticity_token"})
+ token = token_el["value"] if token_el else ""
+ resp = await client.post(
+ AO3_BASE + "/users/login",
+ data={
+ "user[login]": username,
+ "user[password]": password,
+ "authenticity_token": token,
+ "commit": "Log in",
+ },
+ )
+ # Successful login redirects away from the login page
+ return "/users/login" not in str(resp.url)
+
+ async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict:
+ base_url = self._work_base_url(url)
+ r = await client.get(base_url, params={"view_adult": "true"})
+ soup = BeautifulSoup(r.text, "html.parser")
+
+ # Title
+ title_el = soup.find("h2", class_="title")
+ book_title = title_el.get_text(strip=True) if title_el else "Unknown title"
+
+ # Author — can be multiple; Anonymous if no author link
+ byline = soup.find("h3", class_="byline")
+ if byline:
+ author_links = byline.find_all("a", rel="author")
+ author = ", ".join(a.get_text(strip=True) for a in author_links) if author_links else "Anonymous"
+ else:
+ author = "Anonymous"
+
+ # Tags from dl.work.meta.group
+ meta_dl = soup.find("dl", class_="work")
+
+ def _tag_list(dl, css_class: str) -> list[str]:
+ dd = dl.find("dd", class_=css_class) if dl else None
+ return [a.get_text(strip=True) for a in dd.find_all("a")] if dd else []
+
+ fandoms = _tag_list(meta_dl, "fandom")
+ ratings = _tag_list(meta_dl, "rating")
+ categories = _tag_list(meta_dl, "category")
+ relationships = _tag_list(meta_dl, "relationship")
+ characters = _tag_list(meta_dl, "character")
+ freeform_tags = _tag_list(meta_dl, "freeform")
+
+ # Series
+ series = ""
+ series_index_hint = 0
+ if meta_dl:
+ series_dd = meta_dl.find("dd", class_="series")
+ if series_dd:
+ series_link = series_dd.find("a")
+ if series_link:
+ series = series_link.get_text(strip=True)
+ pos_span = series_dd.find("span", class_="position")
+ if pos_span:
+ m = re.search(r"Part\s+(\d+)", pos_span.get_text(), re.I)
+ if m:
+ series_index_hint = int(m.group(1))
+
+ # Stats (nested dl.stats inside the meta dl)
+ published = ""
+ updated_date = ""
+ publication_status = ""
+ if meta_dl:
+ stats_dl = meta_dl.find("dl", class_="stats")
+ if stats_dl:
+ pub_dd = stats_dl.find("dd", class_="published")
+ if pub_dd:
+ published = pub_dd.get_text(strip=True)
+
+ status_dt = stats_dl.find("dt", class_="status")
+ status_dd = stats_dl.find("dd", class_="status")
+ if status_dt and status_dd:
+ updated_date = status_dd.get_text(strip=True)
+ if "Completed" in status_dt.get_text():
+ publication_status = "Complete"
+ else:
+ publication_status = "Ongoing"
+ else:
+ # No status entry — determine from chapters count (N/N = complete)
+ updated_date = published
+ chapters_dd = stats_dl.find("dd", class_="chapters")
+ if chapters_dd:
+ m = re.match(r"(\d+)/(\d+|\?)", chapters_dd.get_text(strip=True))
+ if m:
+ if m.group(2) == "?":
+ publication_status = "Ongoing"
+ elif m.group(1) == m.group(2):
+ publication_status = "Complete"
+
+ # Summary
+ description = ""
+ summary_div = soup.find("div", class_="summary")
+ if summary_div:
+ userstuff = summary_div.find("blockquote", class_="userstuff")
+ if userstuff:
+ paras = [p.get_text(strip=True) for p in userstuff.find_all("p") if p.get_text().strip()]
+ description = "\n\n".join(paras) if paras else userstuff.get_text(strip=True)
+
+ # Chapter list via /navigate
+ chapter_links = []
+ chapter_method = "html_scan"
+ try:
+ nr = await client.get(base_url + "/navigate", params={"view_adult": "true"})
+ nsoup = BeautifulSoup(nr.text, "html.parser")
+ chapter_ol = nsoup.find("ol", class_="chapter")
+ if chapter_ol:
+ for li in chapter_ol.find_all("li"):
+ a = li.find("a", href=True)
+ if a:
+ chapter_links.append({
+ "url": urljoin(AO3_BASE, a["href"]),
+ "title": a.get_text(strip=True),
+ })
+ except Exception:
+ pass
+
+ # Fallback: single-chapter work — the work page itself is the content
+ if not chapter_links:
+ chapter_method = "fallback_numeric"
+ chapter_links.append({"url": base_url, "title": book_title})
+
+ # Map to Novela tag structure:
+ # fandoms → genres
+ # ratings + categories → subgenres
+ # relationships + characters + freeform → tags
+ return {
+ "title": book_title,
+ "author": author,
+ "publisher": "Archive of Our Own",
+ "series": series,
+ "series_index_hint": series_index_hint,
+ "genres": fandoms,
+ "subgenres": ratings + categories,
+ "tags": relationships + characters + freeform_tags,
+ "description": description,
+ "updated_date": updated_date,
+ "publication_status": publication_status,
+ "source_url": base_url,
+ "chapters": chapter_links,
+ "chapter_method": chapter_method,
+ }
+
+ async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict:
+ r = await client.get(ch["url"], params={"view_adult": "true"})
+ soup = BeautifulSoup(r.text, "html.parser")
+
+ # Chapter title and optional summary from the chapter preface
+ title = ch["title"]
+ chapter_summary_bq = None
+ chapters_div = soup.find("div", id="chapters")
+ if chapters_div:
+ chapter_div = chapters_div.find("div", class_="chapter")
+ if chapter_div:
+ title_el = chapter_div.find("h3", class_="title")
+ if title_el:
+ raw = title_el.get_text(strip=True)
+ if raw:
+ title = raw
+ summary_div = chapter_div.find("div", class_="summary")
+ if summary_div:
+ chapter_summary_bq = summary_div.find("blockquote", class_="userstuff")
+
+ # Content: div.userstuff inside #chapters (excludes author notes)
+ content_el = None
+ if chapters_div:
+ content_el = chapters_div.find("div", class_="userstuff")
+ if not content_el:
+ content_el = soup.find("div", attrs={"role": "article"})
+
+ # Prepend chapter summary as blockquote before story content
+ if chapter_summary_bq and content_el:
+ content_el.insert(0, chapter_summary_bq)
+
+ return {
+ "title": title,
+ "content_el": content_el,
+ "selector_id": content_el.get("id") if content_el else None,
+ "selector_class": " ".join(content_el.get("class", [])) if content_el else None,
+ }
diff --git a/containers/novela/static/conversion.js b/containers/novela/static/conversion.js
index 12ba5ab..7e8b37b 100644
--- a/containers/novela/static/conversion.js
+++ b/containers/novela/static/conversion.js
@@ -71,9 +71,14 @@ function connectConversionStream(job_id) {
document.querySelectorAll('.chapter-item').forEach(el => el.className = 'chapter-item done');
document.getElementById('result-meta').innerHTML =
`${esc(d.title)}
${d.chapters} chapters successfully converted`;
- document.getElementById('download-btn').onclick = () => {
- window.location = `/download/${encodeURIComponent(d.filename)}`;
- };
+ const dlBtn = document.getElementById('download-btn');
+ if (d.storage_type === 'db') {
+ dlBtn.querySelector('span') && (dlBtn.querySelector('span').textContent = 'Export EPUB');
+ dlBtn.onclick = () => { window.location = `/api/library/export-epub/${encodeURIComponent(d.filename)}`; };
+ } else {
+ dlBtn.querySelector('span') && (dlBtn.querySelector('span').textContent = 'Download EPUB');
+ dlBtn.onclick = () => { window.location = `/download/${encodeURIComponent(d.filename)}`; };
+ }
document.getElementById('book-detail-btn').onclick = () => {
window.location = `/library/book/${encodeURIComponent(d.filename)}`;
};
diff --git a/containers/novela/static/editor.css b/containers/novela/static/editor.css
index 21ee86b..e61dd1c 100644
--- a/containers/novela/static/editor.css
+++ b/containers/novela/static/editor.css
@@ -30,6 +30,13 @@ html, body { height: 100%; background: var(--bg); color: var(--text); font-famil
text-align: center; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
}
+.chapter-title-input {
+ flex: 1; font-size: 0.72rem; font-family: var(--mono); color: var(--text);
+ background: var(--surface2); border: 1px solid var(--border); border-radius: var(--radius);
+ padding: 0.25rem 0.5rem; outline: none; min-width: 0;
+}
+.chapter-title-input:focus { border-color: var(--accent); }
+
.header-actions { display: flex; align-items: center; gap: 0.5rem; flex-shrink: 0; }
.save-status {
diff --git a/containers/novela/static/editor.js b/containers/novela/static/editor.js
index 9ca9d63..78c9626 100644
--- a/containers/novela/static/editor.js
+++ b/containers/novela/static/editor.js
@@ -1,20 +1,26 @@
require.config({ paths: { vs: 'https://cdn.jsdelivr.net/npm/monaco-editor@0.45.0/min/vs' } });
-const { filename } = EDITOR;
+const { filename, is_db } = EDITOR;
let editor = null;
let chapters = [];
let currentIndex = -1;
let dirty = new Set(); // indices with unsaved changes
let pendingContent = new Map(); // index -> modified content not yet saved
+let pendingTitles = new Map(); // index -> modified title not yet saved (DB only)
let loadingChapter = false; // suppress dirty events during setValue
let saving = false;
// ── Init Monaco ───────────────────────────────────────────────────────────────
require(['vs/editor/editor.main'], function () {
+ if (is_db) {
+ document.getElementById('header-chapter').style.display = 'none';
+ document.getElementById('chapter-title-input').style.display = '';
+ }
+
editor = monaco.editor.create(document.getElementById('editor-pane'), {
- language: 'xml',
+ language: is_db ? 'html' : 'xml',
theme: 'vs-dark',
wordWrap: 'on',
minimap: { enabled: true },
@@ -39,6 +45,19 @@ require(['vs/editor/editor.main'], function () {
// Ctrl+S / Cmd+S
editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, saveChapter);
+ if (is_db) {
+ document.getElementById('chapter-title-input').addEventListener('input', () => {
+ if (currentIndex >= 0) {
+ pendingTitles.set(currentIndex, document.getElementById('chapter-title-input').value);
+ dirty.add(currentIndex);
+ renderChapterList();
+ setStatus('dirty', 'Unsaved changes');
+ document.getElementById('btn-save').disabled = false;
+ updateSaveAll();
+ }
+ });
+ }
+
loadChapterList();
});
@@ -57,6 +76,7 @@ async function loadChapterList(targetIndex = 0) {
currentIndex = -1;
dirty.clear();
pendingContent.clear();
+ pendingTitles.clear();
renderChapterList();
document.getElementById('header-chapter').textContent = 'No chapters';
document.getElementById('btn-save').disabled = true;
@@ -94,6 +114,11 @@ async function switchChapter(index) {
if (dirty.has(currentIndex) && editor) {
pendingContent.set(currentIndex, editor.getValue());
}
+ // Preserve title input for DB books
+ if (is_db && currentIndex >= 0) {
+ const inp = document.getElementById('chapter-title-input');
+ if (inp) pendingTitles.set(currentIndex, inp.value);
+ }
loadChapter(index);
}
@@ -102,19 +127,19 @@ async function loadChapter(index) {
document.getElementById('btn-save').disabled = true;
document.getElementById('btn-break').disabled = true;
document.getElementById('btn-del-page').disabled = true;
- document.getElementById('header-chapter').textContent = 'Loading…';
+ if (!is_db) document.getElementById('header-chapter').textContent = 'Loading…';
let content, title;
if (pendingContent.has(index)) {
content = pendingContent.get(index);
- title = chapters[index]?.title ?? '';
+ title = pendingTitles.has(index) ? pendingTitles.get(index) : (chapters[index]?.title ?? '');
} else {
const resp = await fetch(`/api/edit/chapter/${index}/${encodeURIComponent(filename)}`);
if (!resp.ok) { setStatus('error', 'Load failed'); return; }
const data = await resp.json();
content = data.content;
- title = data.title;
+ title = pendingTitles.has(index) ? pendingTitles.get(index) : data.title;
}
currentIndex = index;
@@ -123,6 +148,7 @@ async function loadChapter(index) {
editor.setValue(content);
editor.setScrollTop(0);
loadingChapter = false;
+ editor.focus();
// Restore dirty state based on whether we loaded from pending cache
if (dirty.has(index)) {
@@ -134,7 +160,11 @@ async function loadChapter(index) {
}
renderChapterList();
- document.getElementById('header-chapter').textContent = title;
+ if (is_db) {
+ document.getElementById('chapter-title-input').value = title;
+ } else {
+ document.getElementById('header-chapter').textContent = title;
+ }
document.getElementById('btn-break').disabled = false;
document.getElementById('btn-del-page').disabled = chapters.length <= 1;
updateSaveAll();
@@ -149,18 +179,28 @@ async function saveChapter() {
setStatus('saving', 'Saving…');
try {
+ const saveBody = { content: editor.getValue() };
+ if (is_db) {
+ const inp = document.getElementById('chapter-title-input');
+ saveBody.title = inp ? inp.value.trim() : (pendingTitles.get(currentIndex) || '');
+ }
const resp = await fetch(
`/api/edit/chapter/${currentIndex}/${encodeURIComponent(filename)}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ content: editor.getValue() }),
+ body: JSON.stringify(saveBody),
}
);
const data = await resp.json();
if (data.ok) {
dirty.delete(currentIndex);
pendingContent.delete(currentIndex);
+ if (is_db && chapters[currentIndex]) {
+ const saved = pendingTitles.get(currentIndex) || chapters[currentIndex].title;
+ chapters[currentIndex].title = saved || chapters[currentIndex].title;
+ pendingTitles.delete(currentIndex);
+ }
renderChapterList();
setStatus('saved', 'Saved');
setTimeout(() => setStatus('', ''), 2000);
@@ -186,9 +226,13 @@ async function saveAllChapters() {
if (btn) btn.disabled = true;
setStatus('saving', 'Saving all…');
- // Flush current editor content into pendingContent first
+ // Flush current editor content and title into pending caches first
if (currentIndex >= 0 && dirty.has(currentIndex)) {
pendingContent.set(currentIndex, editor.getValue());
+ if (is_db) {
+ const inp = document.getElementById('chapter-title-input');
+ if (inp) pendingTitles.set(currentIndex, inp.value);
+ }
}
const indices = [...dirty];
@@ -196,21 +240,29 @@ async function saveAllChapters() {
const content = pendingContent.has(i)
? pendingContent.get(i)
: (i === currentIndex ? editor.getValue() : null);
- if (!content) continue;
+ // For DB books, a title-only change has no pendingContent — still need to save
+ const hasTitleChange = is_db && pendingTitles.has(i);
+ if (!content && !hasTitleChange) continue;
try {
+ const saveBody = { content: content || '' };
+ if (is_db) saveBody.title = pendingTitles.has(i) ? pendingTitles.get(i) : (chapters[i]?.title || '');
const resp = await fetch(
`/api/edit/chapter/${i}/${encodeURIComponent(filename)}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ content }),
+ body: JSON.stringify(saveBody),
}
);
const data = await resp.json();
if (data.ok) {
dirty.delete(i);
pendingContent.delete(i);
+ if (is_db && chapters[i]) {
+ chapters[i].title = pendingTitles.get(i) || chapters[i].title;
+ pendingTitles.delete(i);
+ }
}
} catch {
setStatus('error', `Save failed on chapter ${i + 1}`);
@@ -251,10 +303,11 @@ function updateSaveAll() {
function insertBreak() {
if (!editor || currentIndex < 0) return;
+ const breakSrc = is_db ? '/static/break.png' : '../Images/break.png';
const pos = editor.getPosition();
editor.executeEdits('insert-break', [{
range: new monaco.Range(pos.lineNumber, pos.column, pos.lineNumber, pos.column),
- text: '\n
\n',
+ text: `\n
\n`,
forceMoveMarkers: true,
}]);
editor.focus();
@@ -286,6 +339,7 @@ async function addChapter() {
dirty.clear();
pendingContent.clear();
+ pendingTitles.clear();
await loadChapterList(data.index ?? Math.max(currentIndex + 1, 0));
setStatus('saved', 'Page added');
setTimeout(() => setStatus('', ''), 1500);
@@ -315,6 +369,7 @@ async function deleteChapter() {
dirty.clear();
pendingContent.clear();
+ pendingTitles.clear();
await loadChapterList(data.index ?? Math.max(currentIndex - 1, 0));
setStatus('saved', 'Page deleted');
setTimeout(() => setStatus('', ''), 1500);
diff --git a/containers/novela/static/sidebar.css b/containers/novela/static/sidebar.css
index 47c34aa..df6d039 100644
--- a/containers/novela/static/sidebar.css
+++ b/containers/novela/static/sidebar.css
@@ -72,8 +72,10 @@ html {
text-decoration: none;
transition: background 0.12s, color 0.12s;
}
+.sidebar-nav a:visited { color: var(--text-dim); }
.sidebar-nav a:hover { background: var(--surface2); color: var(--text); }
-.sidebar-nav a.active { background: var(--surface2); color: var(--accent); }
+.sidebar-nav a.active,
+.sidebar-nav a.active:visited { background: var(--surface2); color: var(--accent); }
.sidebar-nav a svg { flex-shrink: 0; }
.sidebar-count {
diff --git a/containers/novela/templates/_sidebar.html b/containers/novela/templates/_sidebar.html
index 42816a0..6089076 100644
--- a/containers/novela/templates/_sidebar.html
+++ b/containers/novela/templates/_sidebar.html
@@ -152,6 +152,14 @@
+
+
+
+ Search
+
+