diff --git a/build-and-push.sh b/build-and-push.sh index a2e7419..6b4be0e 100755 --- a/build-and-push.sh +++ b/build-and-push.sh @@ -3,75 +3,54 @@ set -euo pipefail # ============================================================================ # build-and-push.sh -# Location: repo root # # Purpose: -# - Automatic version bump: -# 1 = patch, 2 = minor, 3 = major, t = test -# - Test builds: only update :dev (no commit/tag) -# - Release builds: update version.txt, commit, tag, push (to the current branch) -# - Build & push Docker images for each service under ./compose/* -# - Preflight checks: Docker daemon up, logged in to registry, valid names/tags -# - Summary: show all images + tags built and pushed -# - Branch visibility: -# - Shows currently checked out branch (authoritative) -# - Reads .last-branch for info (if present) when BRANCH is not set -# - Writes the current branch back to .last-branch at the end +# - Build & push Docker images for each service under ./containers/* +# - Two modes: +# t (test) = only push :dev +# r (release) = push :, :dev, :latest +# version is read from the top of changelog.md +# +# No git operations: committing and tagging is done manually. # # Usage: -# BRANCH= ./build-and-push.sh [bump] # BRANCH is optional; informative only -# ./build-and-push.sh [bump] -# If [bump] is omitted, you will be prompted (default = t). +# ./build-and-push.sh [mode] +# - mode = t -> test build, push :dev only +# - mode = r -> release build, version taken from changelog.md +# - omitted -> prompt (default: t) +# +# Requirements: +# - docs/changelog.md (relative to repo root), with the most recent release +# at the top as: +# ## vX.Y.Z — YYYY-MM-DD +# (the version is parsed from the first such line) +# - One Dockerfile per service under ./containers//Dockerfile # ============================================================================ DOCKER_REGISTRY="gitea.oskamp.info" DOCKER_NAMESPACE="ivooskamp" -VERSION_FILE="version.txt" -START_VERSION="v0.1.0" -COMPOSE_DIR="containers" -LAST_BRANCH_FILE=".last-branch" # stored in repo root +CHANGELOG_FILE="docs/changelog.md" +CONTAINERS_DIR="containers" # --- Input: prompt if missing ------------------------------------------------ -BUMP="${1:-}" -if [[ -z "${BUMP}" ]]; then - echo "Select bump type: [1] patch, [2] minor, [3] major, [t] test (default: t)" - read -r BUMP - BUMP="${BUMP:-t}" +MODE="${1:-}" +if [[ -z "${MODE}" ]]; then + echo "Select build type: [t] test build (push :dev only), [r] release build (default: t)" + read -r MODE + MODE="${MODE:-t}" fi -if [[ "$BUMP" != "1" && "$BUMP" != "2" && "$BUMP" != "3" && "$BUMP" != "t" ]]; then - echo "[ERROR] Unknown bump type '$BUMP' (use 1, 2, 3, or t)." - exit 1 -fi +case "$MODE" in + t|test) MODE="t" ;; + r|release) MODE="r" ;; + *) + echo "[ERROR] Unknown mode '$MODE' (use 't' for test or 'r' for release)." + exit 1 + ;; +esac # --- Helpers ----------------------------------------------------------------- -read_version() { - if [[ -f "$VERSION_FILE" ]]; then - tr -d ' \t\n\r' < "$VERSION_FILE" - else - echo "$START_VERSION" - fi -} - -write_version() { - echo "$1" > "$VERSION_FILE" -} - -bump_version() { - local cur="$1" - local kind="$2" - local core="${cur#v}" - IFS='.' read -r MA MI PA <<< "$core" - case "$kind" in - 1) PA=$((PA + 1));; - 2) MI=$((MI + 1)); PA=0;; - 3) MA=$((MA + 1)); MI=0; PA=0;; - *) echo "[ERROR] Unknown bump kind"; exit 1;; - esac - echo "v${MA}.${MI}.${PA}" -} - check_docker_ready() { if ! docker info >/dev/null 2>&1; then echo "[ERROR] Docker daemon not reachable. Is Docker running and do you have permission to use it?" @@ -113,14 +92,35 @@ validate_tag() { fi } -# --- Preflight --------------------------------------------------------------- -if [[ ! -d ".git" ]]; then - echo "[ERROR] Not a git repository (.git missing)." - exit 1 -fi +# Parse the first "## vX.Y.Z ..." heading from changelog.md. +# Accepts: ## v1.0.3 — 2026-04-24 +# ## v1.0.3 - 2026-04-24 +# ## v1.0.3 +read_version_from_changelog() { + if [[ ! -f "$CHANGELOG_FILE" ]]; then + echo "[ERROR] $CHANGELOG_FILE not found in $(pwd)." >&2 + exit 1 + fi + local line + # Match lines starting with "## v.." + line="$(grep -m1 -E '^##[[:space:]]+v[0-9]+\.[0-9]+\.[0-9]+' "$CHANGELOG_FILE" || true)" + if [[ -z "$line" ]]; then + echo "[ERROR] No release heading found in $CHANGELOG_FILE (expected e.g. '## v1.0.3 — 2026-04-24' near the top)." >&2 + exit 1 + fi + # Extract the vX.Y.Z token + local version + version="$(echo "$line" | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+' | head -n1)" + if [[ -z "$version" ]]; then + echo "[ERROR] Could not parse version from line: $line" >&2 + exit 1 + fi + echo "$version" +} -if [[ ! -d "$COMPOSE_DIR" ]]; then - echo "[ERROR] '$COMPOSE_DIR' directory missing. Expected ./compose// with a Dockerfile." +# --- Preflight --------------------------------------------------------------- +if [[ ! -d "$CONTAINERS_DIR" ]]; then + echo "[ERROR] '$CONTAINERS_DIR' directory missing. Expected ./${CONTAINERS_DIR}// with a Dockerfile." exit 1 fi @@ -128,90 +128,44 @@ check_docker_ready ensure_registry_login validate_repo_component "$DOCKER_NAMESPACE" -# Detect currently checked out branch (authoritative for this script) -DETECTED_BRANCH="$(git branch --show-current 2>/dev/null || true)" -if [[ -z "$DETECTED_BRANCH" ]]; then - DETECTED_BRANCH="$(git symbolic-ref --quiet --short HEAD 2>/dev/null || true)" +# Informational: show branch and HEAD if this happens to be a git repo. +BRANCH_INFO="" +HEAD_INFO="" +if [[ -d ".git" ]]; then + BRANCH_INFO="$(git branch --show-current 2>/dev/null || echo unknown)" + HEAD_INFO="$(git rev-parse --short HEAD 2>/dev/null || echo unknown)" + echo "[INFO] Repo: $(pwd)" + echo "[INFO] Current branch: $BRANCH_INFO" + echo "[INFO] HEAD (sha): $HEAD_INFO" +else + echo "[INFO] Repo: $(pwd) (not a git checkout)" fi -if [[ -z "$DETECTED_BRANCH" ]]; then - # Try to derive from upstream - UPSTREAM_REF_DERIVED="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || true)" - if [[ -n "$UPSTREAM_REF_DERIVED" ]]; then - DETECTED_BRANCH="${UPSTREAM_REF_DERIVED#origin/}" + +# --- Determine version (release only) ---------------------------------------- +VERSION="" +if [[ "$MODE" == "r" ]]; then + VERSION="$(read_version_from_changelog)" + echo "[INFO] Release version (from $CHANGELOG_FILE): $VERSION" + validate_tag "$VERSION" + validate_tag "latest" + + # Ask for confirmation so you never accidentally re-push an old version or a wrong one. + read -r -p "Proceed building & pushing as ${VERSION}? [y/N] " CONFIRM + CONFIRM="${CONFIRM:-N}" + if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then + echo "[INFO] Aborted by user." + exit 0 fi -fi -if [[ -z "$DETECTED_BRANCH" ]]; then - DETECTED_BRANCH="main" -fi - -# Optional signals: BRANCH env and .last-branch (informational only) -ENV_BRANCH="${BRANCH:-}" -LAST_BRANCH_FILE_PATH="$(pwd)/$LAST_BRANCH_FILE" -LAST_BRANCH_VALUE="" -if [[ -z "$ENV_BRANCH" && -f "$LAST_BRANCH_FILE_PATH" ]]; then - LAST_BRANCH_VALUE="$(tr -d ' \t\n\r' < "$LAST_BRANCH_FILE_PATH")" -fi - -UPSTREAM_REF="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || echo "origin/$DETECTED_BRANCH")" -HEAD_SHA="$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")" - -echo "[INFO] Repo: $(pwd)" -echo "[INFO] Current branch: $DETECTED_BRANCH" -echo "[INFO] Upstream: $UPSTREAM_REF" -echo "[INFO] HEAD (sha): $HEAD_SHA" - -if [[ -n "$ENV_BRANCH" && "$ENV_BRANCH" != "$DETECTED_BRANCH" ]]; then - echo "[WARNING] BRANCH='$ENV_BRANCH' differs from checked out branch '$DETECTED_BRANCH'." - echo "[WARNING] This script does not switch branches; continuing on '$DETECTED_BRANCH'." -fi - -if [[ -n "$LAST_BRANCH_VALUE" && "$LAST_BRANCH_VALUE" != "$DETECTED_BRANCH" && -z "$ENV_BRANCH" ]]; then - echo "[INFO] .last-branch suggests '$LAST_BRANCH_VALUE', but current checkout is '$DETECTED_BRANCH'." - echo "[INFO] If you intended to build '$LAST_BRANCH_VALUE', switch branches first (use update-and-build.sh)." -fi - -# --- Versioning -------------------------------------------------------------- -CURRENT_VERSION="$(read_version)" -NEW_VERSION="$CURRENT_VERSION" -DO_TAG_AND_BUMP=true - -if [[ "$BUMP" == "t" ]]; then - echo "[INFO] Test build: keeping version $CURRENT_VERSION; will only update :dev." - DO_TAG_AND_BUMP=false else - NEW_VERSION="$(bump_version "$CURRENT_VERSION" "$BUMP")" - echo "[INFO] New version: $NEW_VERSION" -fi - -if $DO_TAG_AND_BUMP; then - validate_tag "$NEW_VERSION" -fi -validate_tag "latest" - -# --- Version update + VCS ops (release builds only) -------------------------- -if $DO_TAG_AND_BUMP; then - echo "[INFO] Writing $NEW_VERSION to $VERSION_FILE" - write_version "$NEW_VERSION" - - echo "[INFO] Git add + commit (branch: $DETECTED_BRANCH)" - git add "$VERSION_FILE" - git commit -m "Release $NEW_VERSION on branch $DETECTED_BRANCH (bump type $BUMP)" - - echo "[INFO] Git tag $NEW_VERSION" - git tag -a "$NEW_VERSION" -m "Release $NEW_VERSION" - - echo "[INFO] Git push + tags" - git push origin "$DETECTED_BRANCH" - git push --tags -else - echo "[INFO] Skipping commit/tagging (test build)." + echo "[INFO] Test build: only :dev will be pushed." fi +validate_tag "dev" # --- Build & push per service ------------------------------------------------ shopt -s nullglob -services=( "$COMPOSE_DIR"/* ) +services=( "$CONTAINERS_DIR"/* ) if [[ ${#services[@]} -eq 0 ]]; then - echo "[ERROR] No services found under $COMPOSE_DIR" + echo "[ERROR] No services found under $CONTAINERS_DIR" exit 1 fi @@ -231,18 +185,24 @@ for svc_path in "${services[@]}"; do IMAGE_BASE="${DOCKER_REGISTRY}/${DOCKER_NAMESPACE}/${svc}" - if $DO_TAG_AND_BUMP; then + if [[ "$MODE" == "r" ]]; then echo "============================================================" - echo "[INFO] Building ${svc} -> tags: ${NEW_VERSION}, latest" + echo "[INFO] Building ${svc} -> tags: ${VERSION}, dev, latest" echo "============================================================" - docker build -t "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:latest" -t "${IMAGE_BASE}:dev" "$svc_path" - docker push "${IMAGE_BASE}:${NEW_VERSION}" - docker push "${IMAGE_BASE}:latest" + docker build \ + -t "${IMAGE_BASE}:${VERSION}" \ + -t "${IMAGE_BASE}:dev" \ + -t "${IMAGE_BASE}:latest" \ + "$svc_path" + + docker push "${IMAGE_BASE}:${VERSION}" docker push "${IMAGE_BASE}:dev" - BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:latest" "${IMAGE_BASE}:dev") + docker push "${IMAGE_BASE}:latest" + + BUILT_IMAGES+=("${IMAGE_BASE}:${VERSION}" "${IMAGE_BASE}:dev" "${IMAGE_BASE}:latest") else echo "============================================================" - echo "[INFO] Test build ${svc} -> tag: latest" + echo "[INFO] Test build ${svc} -> tag: dev" echo "============================================================" docker build -t "${IMAGE_BASE}:dev" "$svc_path" docker push "${IMAGE_BASE}:dev" @@ -250,21 +210,27 @@ for svc_path in "${services[@]}"; do fi done -# --- Persist current branch to .last-branch ---------------------------------- -# (This helps script 1 to preselect next time, and is informative if you run script 2 standalone) -echo "$DETECTED_BRANCH" > "$LAST_BRANCH_FILE_PATH" - # --- Summary ----------------------------------------------------------------- echo "" echo "============================================================" -echo "[SUMMARY] Build & push complete (branch: $DETECTED_BRANCH)" -if $DO_TAG_AND_BUMP; then - echo "[INFO] Release version: $NEW_VERSION" +if [[ "$MODE" == "r" ]]; then + echo "[SUMMARY] Release build & push complete: $VERSION" else - echo "[INFO] Test build (no version bump)" + echo "[SUMMARY] Test build & push complete (:dev only)" +fi +if [[ -n "$BRANCH_INFO" ]]; then + echo "[INFO] Branch: $BRANCH_INFO HEAD: $HEAD_INFO" fi echo "[INFO] Images pushed:" for img in "${BUILT_IMAGES[@]}"; do echo " - $img" done echo "============================================================" +echo "" +echo "[REMINDER] No git operations were performed. If this was a release," +echo " commit and tag manually, e.g.:" +if [[ "$MODE" == "r" ]]; then + echo " git add -A && git commit -m \"Release ${VERSION}\"" + echo " git tag -a ${VERSION} -m \"Release ${VERSION}\"" + echo " git push && git push --tags" +fi diff --git a/containers/novela/cbr.py b/containers/novela/cbr.py index f425d12..8788ed3 100644 --- a/containers/novela/cbr.py +++ b/containers/novela/cbr.py @@ -1,3 +1,4 @@ +import functools from io import BytesIO from pathlib import Path import zipfile @@ -27,7 +28,9 @@ def _detect_format(path: Path) -> str: return "zip" if path.suffix.lower() == ".cbz" else "rar" -def cbr_page_list(path: Path) -> list[str]: +@functools.lru_cache(maxsize=64) +def _cbr_page_list_cached(str_path: str, _mtime: float) -> tuple[str, ...]: + path = Path(str_path) fmt = _detect_format(path) if fmt == "zip": with zipfile.ZipFile(path) as zf: @@ -38,7 +41,11 @@ def cbr_page_list(path: Path) -> list[str]: else: with rarfile.RarFile(path) as rf: names = [n for n in rf.namelist() if Path(n).suffix.lower() in SUPPORTED_IMG] - return sorted(names) + return tuple(sorted(names)) + + +def cbr_page_list(path: Path) -> list[str]: + return list(_cbr_page_list_cached(str(path), path.stat().st_mtime)) def cbr_page_count(path: Path) -> int: diff --git a/containers/novela/changelog.py b/containers/novela/changelog.py index 2b40ce7..9ecb9eb 100644 --- a/containers/novela/changelog.py +++ b/containers/novela/changelog.py @@ -3,6 +3,218 @@ Changelog data for Novela """ CHANGELOG = [ + { + "version": "v0.2.9", + "date": "2026-05-09", + "summary": "Reading position is now monotonic across devices — only advances, never rewinds; explicit Mark as read/unread still resets.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Reader: reading position is now monotonic across devices — when the same book is read on multiple devices, the saved position only advances and never rewinds; previously, opening the book on a device with an older stored position would overwrite the further progress made on another device. The progress endpoint compares the incoming (chapterIndex, scrollFrac) to the stored value and only writes when the new position is strictly further. Explicit Mark as read / Mark as unread still clears the row, so deliberate restarts work as before.", + ], + }, + ], + }, + { + "version": "v0.2.8", + "date": "2026-04-22", + "summary": "Newly converted books from the grabber show up in the New view again.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Grabber: newly converted books now appear in the New view again — both the DB-storage and file-EPUB branches in routers/grabber.py now persist needs_review=True on upsert_book (was False); the New view filters on needs_review, so previously grabbed books never showed up there", + ], + }, + ], + }, + { + "version": "v0.2.7", + "date": "2026-04-22", + "summary": "Consecutive scene-break images are now collapsed to a single break, and TECHNICAL.md is brought up to date with recent FlareSolverr, Book Info and editor changes.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Break detection: runs of 2+ consecutive scene-break images are now collapsed to a single break — the reader and exported EPUBs no longer show multiple identical break images directly after each other", + ], + }, + { + "title": "Internal", + "type": "improvement", + "changes": [ + "New helper collapse_consecutive_breaks() in xhtml.py matches 2+ consecutive break-image
lines (with optional whitespace between) and replaces them with one; applied in normalize_wysiwyg_html() (editor save path) and in routers/grabber.py on both the preview converted_xhtml and the per-chapter content_html produced during scraping", + "docs/TECHNICAL.md updated to cover previously missing changes: POST /api/edit/intro/{filename} and the title field on file-EPUB chapter save; FlareSolverr sidecar and BaseScraper.close(); AwesomeDudeScraper uses FlareSolverr; make_epub(include_intro=…) and epub_utils.build_book_info_body_html; grabber DB flow stores Book Info as chapter 0; 'Book Info' h1-strip skip in reader; new env vars (FLARESOLVERR_URL, FLARESOLVERR_TIMEOUT_MS, NOVELA_PORT, ADMINER_PORT); collapse_consecutive_breaks() helper", + ], + }, + ], + }, + { + "version": "v0.2.6", + "date": "2026-04-22", + "summary": "FlareSolverr sidecar lets the scraper bypass Cloudflare 'Just a moment…' challenges (awesomedude.org), with per-book sessions so chapters after the first are much faster.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "Scrapers: Cloudflare-protected sites (e.g. awesomedude.org, now fully behind a 'Just a moment…' JavaScript challenge) can be scraped again via a new FlareSolverr sidecar service that solves the challenge in a headless browser; the novela container uses FlareSolverr for both the book-info page and every chapter fetch", + "Per-book FlareSolverr sessions: the scraper creates one browser session at the start of a book, reuses it across all chapters (Cloudflare cookies stay warm), and destroys it on completion — only the first request pays the full challenge-solve cost and subsequent chapters are much faster", + ], + }, + { + "title": "Internal", + "type": "improvement", + "changes": [ + "stack/stack.yml adds a flaresolverr service (image ghcr.io/flaresolverr/flaresolverr:latest, internal-only, on novela-net); novela gains FLARESOLVERR_URL=http://flaresolverr:8191/v1 and depends_on: flaresolverr", + "Host port mappings in stack/stack.yml are now driven by ${NOVELA_PORT} and ${ADMINER_PORT}, defaulted in stack/novela.env to 8099 / 8098, so production stacks can override without diverging from the repo", + "New helpers in scrapers/base.py: flaresolverr_get(url, timeout_ms=None, session=None) returns a SimpleNamespace(text, url) as a drop-in for httpx.Response attributes; flaresolverr_session_create() and flaresolverr_session_destroy(sid) manage browser sessions; configurable via FLARESOLVERR_URL and FLARESOLVERR_TIMEOUT_MS env vars", + "BaseScraper gained an async close() method (default no-op) so scrapers can release scoped resources", + "scrapers/awesomedude.py creates a FlareSolverr session in fetch_book_info, reuses it in every fetch_chapter call, and destroys it in close()", + "routers/grabber.py wraps all three scraper usages (debug_run, preview, _run_scrape) in try/finally: await scraper.close() so FlareSolverr sessions are always released, even on errors", + ], + }, + ], + }, + { + "version": "v0.2.5", + "date": "2026-04-22", + "summary": "Book Info page generator in the editor, editable chapter titles for file EPUBs, and Book Info page auto-inserted during DB-storage conversions.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "Editor: Info page button in the chapter editor toolbar generates a gayauthors-style book-info page (title, author, genres, sub-genres, tags, description, source, updated) and inserts it as the first chapter; empty metadata fields are skipped; no duplicate detection — clicking it again will add another page", + "Editor: chapter titles are now editable for file-EPUB books (DB books already supported this); the chapter-title input works for both storage types, and for file EPUBs the matching NCX navPoint is updated on save so the table of contents reflects the new title", + "Grabber: DB-storage conversions now persist the Book Info page as a real stored chapter at index 0, so it is visible in the editor and reader (EPUB-storage conversions continue to produce intro.xhtml via make_epub as before)", + ], + }, + { + "title": "Internal", + "type": "improvement", + "changes": [ + "New endpoint POST /api/edit/intro/{filename} — for DB books shifts existing chapter_index values up by one via a two-step negation and inserts 'Book Info' at index 0; for file EPUBs writes a new intro_.xhtml via make_intro_xhtml, adds a manifest item, places the itemref at the start of the spine, and inserts a navPoint at the top of the NCX with renumbered playOrder", + "POST /api/edit/chapter/{index}/{filename} for file EPUBs now accepts a title field alongside content and updates the matching NCX navPoint text when it changes", + "make_epub gained an include_intro: bool = True parameter; DB → EPUB export (reader.py) calls it with include_intro=False because the stored chapter 0 is now the single source of truth for the info page", + "reader.py leading-h-tag stripping (get_chapter_html and DB→EPUB export) is skipped when title == 'Book Info', so the

{book title}

in that chapter's body survives", + "New helper epub_utils.build_book_info_body_html(title, author, info) returns the inner-body HTML fragment for DB storage; skips empty fields and separates description and source/updated blocks with
", + ], + }, + ], + }, + { + "version": "v0.2.4", + "date": "2026-04-21", + "summary": "Backup: separate Scanned vs Uploaded counts, and live phase indicator on the backup page.", + "sections": [ + { + "title": "Improvements", + "type": "improvement", + "changes": [ + "Backup: status and history now clearly distinguish Scanned (library files inspected) from Uploaded (objects actually sent to Dropbox — library + snapshot + pg_dump); previously only the upload count was shown, which was confusing when most files were already deduplicated", + "Backup page: live phase indicator shown under the Run buttons while a backup is running (scanning library, uploading library objects, uploading snapshot, uploading pg_dump), so it is clear the process is not stuck at N/N while snapshot and pg_dump are uploaded", + ], + }, + { + "title": "Internal", + "type": "improvement", + "changes": [ + "Migration backup_log_scanned_files adds a scanned_files column to backup_log; /api/backup/status and /api/backup/history return uploaded_files and scanned_files (the old files_count key was renamed to uploaded_files)", + ], + }, + ], + }, + { + "version": "v0.2.3", + "date": "2026-04-21", + "summary": "Backup: Dropbox upload timeout and chunk size tuned to prevent read-timeout errors.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Backup: Dropbox uploads no longer fail with 'HTTPSConnectionPool ... Read timed out. (read timeout=120)' — the Dropbox client timeout was raised from 120s to 300s and the upload chunk size was reduced from 100 MB to 16 MB so each chunk completes comfortably within the timeout window", + ], + }, + ], + }, + { + "version": "v0.2.2", + "date": "2026-04-16", + "summary": "Four inline formatting buttons in the chapter editor: subheading, chat, indent, and comment block.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "Editor: four inline formatting buttons added to the chapter editor toolbar — S (subheading, red bold), C (chat, orange), →| (indented paragraph), [ ] (comment block with blue left border); each button wraps the selected text or inserts an empty tag at the cursor; wrap logic automatically uses a
when the selection contains block elements to keep the HTML valid", + ], + }, + ], + }, + { + "version": "v0.2.1", + "date": "2026-04-16", + "summary": "Migration progress now visible in Docker logs at startup.", + "sections": [ + { + "title": "Improvements", + "type": "improvement", + "changes": [ + "Startup: migration progress is now visible in Docker logs — each migration logs whether it was skipped or executed (with duration in ms); a summary line shows either 'all already applied' or how many were executed", + ], + }, + ], + }, + { + "version": "v0.2.0", + "date": "2026-04-15", + "summary": "Deferred chapter save in the editor, startup performance, ETag accuracy, scraper encoding fixes, and internal hardening.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "Editor: chapter add and delete are now deferred — structural changes are no longer saved immediately; they are applied in the correct order when the Save button is pressed", + "Operations: GET /health endpoint — returns {\"ok\": true} when the database is reachable; suitable for container health checks and monitoring", + ], + }, + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Editor: adding a chapter to a DB-stored book no longer fails with a UniqueViolation — PostgreSQL was checking the unique constraint on (filename, chapter_index) mid-update; fixed with a two-step index shift", + "Scraper: Codey's World pages now decode correctly — pages are read as Windows-1252 (cp1252), which correctly maps the 0x80–0x9F byte range; characters like …, ', \", — no longer appear as replacement characters", + "XHTML conversion:   followed by a regular space no longer produces a double space — non-breaking spaces are normalized to regular spaces and consecutive spaces are collapsed; applies to all scrapers", + ], + }, + { + "title": "Improvements", + "type": "improvement", + "changes": [ + "Startup: each database migration now runs only once — a schema_migrations tracking table prevents heavy migrations from re-running on every container restart; startup connection overhead reduced from 37 separate connections to 1", + "Library API: ETag now reflects changes to tags and reading progress — tag edits and progress updates correctly invalidate the client cache", + "CBR/CBZ reader: page list is cached per file and modification time — avoids opening the archive twice per page request", + "Grabber and backup: in-memory job dicts are capped at 50 entries to prevent unbounded memory growth", + ], + }, + { + "title": "Internal", + "type": "improvement", + "changes": [ + "Shared epub_utils.py module eliminates near-identical EPUB helper functions that existed across reader.py, editor.py, and common.py; fixes a double-escaped regex in the old OPF path lookup", + "pdf_cover_thumb no longer writes a temporary file — cover thumbnail generated fully in-memory, eliminating a race condition under concurrent requests", + "security.py: hardcoded fallback encryption key removed; raises a clear error at startup when no key is configured; Fernet instance cached per process", + "builder.py: all explicit conn.commit() calls replaced with with conn: context manager", + ], + }, + ], + }, { "version": "v0.1.12", "date": "2026-04-15", diff --git a/containers/novela/epub.py b/containers/novela/epub.py index fef8474..480322a 100644 --- a/containers/novela/epub.py +++ b/containers/novela/epub.py @@ -219,6 +219,7 @@ def make_epub( break_img_data: bytes, book_id: str, book_info: dict | None = None, + include_intro: bool = True, ) -> bytes: """Build a complete EPUB 2.0 in-memory and return the bytes.""" buf = io.BytesIO() @@ -240,7 +241,7 @@ def make_epub( """, ) - css = open("static/epub-style.css", "r", encoding="utf-8").read() + css = Path("static/epub-style.css").read_text(encoding="utf-8") zf.writestr("OEBPS/Styles/style.css", css) zf.writestr("OEBPS/Images/break.png", break_img_data) @@ -257,7 +258,8 @@ def make_epub( cover_filename, cover_media_type = detect_image_format(cover_data, "cover") zf.writestr(f"OEBPS/Images/{cover_filename}", cover_data) - zf.writestr("OEBPS/Text/intro.xhtml", make_intro_xhtml(book_title, author, info)) + if include_intro: + zf.writestr("OEBPS/Text/intro.xhtml", make_intro_xhtml(book_title, author, info)) # Chapter images for ch in chapters: @@ -291,12 +293,13 @@ def make_epub( ) manifest_items.append('') manifest_items.append('') - manifest_items.append('') + if include_intro: + manifest_items.append('') for i, (fname, _) in enumerate(chapter_files, 1): manifest_items.append(f'') manifest_items.append('') - spine_items = [''] + [ + spine_items = ([''] if include_intro else []) + [ f'' for i in range(1, len(chapter_files) + 1) ] @@ -354,15 +357,18 @@ def make_epub( zf.writestr("OEBPS/content.opf", opf) # TOC NCX - nav_points = [ - """ + nav_points = [] + if include_intro: + nav_points.append( + """ Book Info """ - ] + ) + ch_offset = 1 if include_intro else 0 for i, (fname, title) in enumerate(chapter_files, 1): nav_points.append( - f""" + f""" {he(title)} """ @@ -426,7 +432,7 @@ def write_epub_file(epub_path, internal_path: str, content: str) -> None: if not has_break: try: - zout.writestr(break_img_path, open("static/break.png", "rb").read()) + zout.writestr(break_img_path, Path("static/break.png").read_bytes()) except Exception: pass diff --git a/containers/novela/epub_utils.py b/containers/novela/epub_utils.py new file mode 100644 index 0000000..3132a87 --- /dev/null +++ b/containers/novela/epub_utils.py @@ -0,0 +1,224 @@ +"""Shared EPUB utilities — used by routers/reader.py, routers/editor.py, routers/common.py.""" + +import html as _html +import posixpath +import re +import zipfile as zf +from pathlib import Path + +from bs4 import BeautifulSoup + + +def find_opf_path(names: set[str], container_xml: str | None) -> str | None: + """Locate the OPF file path inside an EPUB ZIP.""" + opf_path = "OEBPS/content.opf" + if container_xml: + m = re.search(r"full-path\s*=\s*['\"]([^'\"]+)['\"]", container_xml) + if m: + opf_path = m.group(1) + if opf_path in names: + return opf_path + candidates = sorted(n for n in names if n.lower().endswith(".opf")) + return candidates[0] if candidates else None + + +def norm_href(base_dir: str, rel: str) -> str: + """Resolve a relative EPUB href against a base directory, stripping fragments.""" + rel = (rel or "").split("#", 1)[0].strip() + if not rel: + return "" + return posixpath.normpath(posixpath.join(base_dir, rel)).lstrip("./") + + +def epub_spine(path: Path) -> list[dict]: + """Return an ordered list of ``{index, title, href}`` for all spine items. + + Supports EPUB2 (toc.ncx) and EPUB3 (nav.xhtml). Uses the OPF path from + META-INF/container.xml via :func:`find_opf_path` — fixes the double-escaped + ``\\\\s*`` regex that existed in the old private ``_epub_spine`` copies. + """ + with zf.ZipFile(path, "r") as z: + names = set(z.namelist()) + + container_xml = ( + z.read("META-INF/container.xml").decode("utf-8", errors="replace") + if "META-INF/container.xml" in names + else None + ) + opf_path = find_opf_path(names, container_xml) + if not opf_path: + return [] + + opf_xml = z.read(opf_path).decode("utf-8", errors="replace") + opf = BeautifulSoup(opf_xml, "xml") + opf_dir = posixpath.dirname(opf_path) + + manifest: dict[str, str] = {} + for item in opf.find_all("item"): + iid = item.get("id") + href = item.get("href") + if iid and href: + manifest[iid] = norm_href(opf_dir, href) + + spine_idrefs: list[str] = [] + spine_tag = opf.find("spine") + toc_id = spine_tag.get("toc") if spine_tag else None + if spine_tag: + for ir in spine_tag.find_all("itemref"): + rid = ir.get("idref") + if rid: + spine_idrefs.append(rid) + + hrefs = [manifest[rid] for rid in spine_idrefs if rid in manifest] + href_to_title: dict[str, str] = {} + + # EPUB2: NCX titles + ncx_path = "" + if toc_id and toc_id in manifest: + ncx_path = manifest[toc_id] + elif "toc.ncx" in names: + ncx_path = "toc.ncx" + elif "OEBPS/toc.ncx" in names: + ncx_path = "OEBPS/toc.ncx" + + if ncx_path and ncx_path in names: + try: + ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace") + ncx = BeautifulSoup(ncx_xml, "xml") + ncx_dir = posixpath.dirname(ncx_path) + for np in ncx.find_all("navPoint"): + content = np.find("content") + label_tag = np.find("text") + src = content.get("src") if content else "" + label = label_tag.get_text(strip=True) if label_tag else "" + if src and label: + href_to_title[norm_href(ncx_dir, src)] = _html.unescape(label) + except Exception: + pass + + # EPUB3: nav.xhtml titles (fallback) + if not href_to_title: + nav_item = None + for item in opf.find_all("item"): + props = (item.get("properties") or "").split() + if "nav" in props: + nav_item = item + break + if nav_item and nav_item.get("href"): + nav_path = norm_href(opf_dir, nav_item.get("href")) + if nav_path in names: + try: + nav_xml = z.read(nav_path).decode("utf-8", errors="replace") + nav = BeautifulSoup(nav_xml, "lxml") + nav_dir = posixpath.dirname(nav_path) + for a in nav.select("nav a[href]"): + src = a.get("href", "") + label = a.get_text(" ", strip=True) + if src and label: + href_to_title[norm_href(nav_dir, src)] = _html.unescape(label) + except Exception: + pass + + chapters = [] + for i, href in enumerate(hrefs): + base = posixpath.basename(href) + title = href_to_title.get(href, re.sub(r"\.(xhtml|html|htm)$", "", base, flags=re.I)) + chapters.append({"index": i, "title": title or f"Chapter {i + 1}", "href": href}) + return chapters + + +def build_book_info_body_html(title: str, author: str, info: dict) -> str: + """Return the body-fragment HTML for a 'Book Info' chapter. + + Matches the gayauthors-style layout: title, author line, genres/sub-genres/tags, + description, and source/updated block — separated by ``
``. Fields + that are empty are skipped entirely. + """ + parts: list[str] = [] + t = (title or "").strip() + if t: + parts.append(f"

{_html.escape(t)}

") + a = (author or "").strip() + if a: + parts.append(f'

by {_html.escape(a)}

') + + genres = [g for g in (info.get("genres") or []) if g] + subgenres = [g for g in (info.get("subgenres") or []) if g] + tags = [t for t in (info.get("tags") or []) if t] + description = (info.get("description") or "").strip() + source_url = (info.get("source_url") or "").strip() + updated_date = (info.get("updated_date") or "").strip() + + if genres: + parts.append(f'

Genres: {_html.escape(", ".join(genres))}

') + if subgenres: + parts.append(f'

Sub-genres: {_html.escape(", ".join(subgenres))}

') + if tags: + parts.append(f'

Tags: {_html.escape(", ".join(tags))}

') + + if description: + parts.append("
") + for para in description.split("\n\n"): + p = para.strip() + if p: + parts.append(f"

{_html.escape(p)}

") + + if source_url or updated_date: + parts.append("
") + if source_url: + parts.append(f'

Source: {_html.escape(source_url)}

') + if updated_date: + parts.append(f'

Updated: {_html.escape(updated_date)}

') + + return "\n".join(parts) + + +def make_new_chapter_xhtml(title: str) -> str: + """Return a minimal valid XHTML 1.0 chapter stub.""" + safe_title = _html.escape((title or "New chapter").strip() or "New chapter") + return ( + '\n' + '\n' + '\n' + "\n" + ' \n' + f" {safe_title}\n" + ' \n' + "\n" + "\n" + f'

{safe_title}

\n' + "

\n" + "\n" + "\n" + ) + + +def rewrite_epub_entries( + epub_path: Path, + updates: dict[str, bytes], + remove_paths: set[str] | None = None, +) -> None: + """Rewrite entries in an EPUB ZIP. + + Crash-safe: writes to a ``.tmp.epub`` file first, then atomically replaces + the original. The ``mimetype`` entry is always stored uncompressed + (ZIP_STORED) as required by the EPUB spec. + """ + remove_paths = set(remove_paths or set()) + tmp = epub_path.with_suffix(".tmp.epub") + with zf.ZipFile(epub_path, "r") as zin, zf.ZipFile(tmp, "w", compression=zf.ZIP_DEFLATED) as zout: + existing: set[str] = set() + for item in zin.infolist(): + name = item.filename + existing.add(name) + if name in remove_paths: + continue + data = updates.get(name, zin.read(name)) + ctype = zf.ZIP_STORED if name == "mimetype" else zf.ZIP_DEFLATED + zout.writestr(item, data, compress_type=ctype) + for name, data in updates.items(): + if name not in existing and name not in remove_paths: + ctype = zf.ZIP_STORED if name == "mimetype" else zf.ZIP_DEFLATED + zout.writestr(name, data, compress_type=ctype) + tmp.replace(epub_path) diff --git a/containers/novela/main.py b/containers/novela/main.py index d53f8aa..aa60208 100644 --- a/containers/novela/main.py +++ b/containers/novela/main.py @@ -1,10 +1,13 @@ +import logging from contextlib import asynccontextmanager from fastapi import FastAPI -from fastapi.responses import RedirectResponse + +logging.basicConfig(level=logging.INFO) +from fastapi.responses import JSONResponse, RedirectResponse from fastapi.staticfiles import StaticFiles -from db import close_pool, init_pool +from db import close_pool, get_db_conn, init_pool from migrations import run_migrations from routers.backup import start_backup_scheduler, stop_backup_scheduler from routers import ( @@ -50,6 +53,18 @@ app.include_router(changelog_router) app.include_router(search_router) +@app.get("/health") +async def health(): + try: + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1") + db_ok = True + except Exception: + db_ok = False + return JSONResponse({"ok": db_ok}) + + @app.get("/") async def index_redirect(): return RedirectResponse(url="/home", status_code=302) diff --git a/containers/novela/migrations.py b/containers/novela/migrations.py index 5a01cf7..543c171 100644 --- a/containers/novela/migrations.py +++ b/containers/novela/migrations.py @@ -1,7 +1,11 @@ +import logging import re +import time from db import direct_connect +logger = logging.getLogger(__name__) + _DEFAULT_REGEX = [ r"^\s*[\*\-]{3,}\s*$", r"^\s*[·•◦‣⁃]\s*[·•◦‣⁃]\s*[·•◦‣⁃]\s*$", @@ -24,17 +28,32 @@ _DEFAULT_CSS = [ ] -def _exec(sql: str) -> None: - conn = direct_connect() - try: - with conn: - with conn.cursor() as cur: - cur.execute(sql) - finally: - conn.close() +def _exec(sql: str, conn) -> None: + with conn.cursor() as cur: + cur.execute(sql) -def migrate_create_library() -> None: +def _run_once(conn, name: str, fn) -> bool: + """Run fn(conn) only if name has not been recorded in schema_migrations. + Returns True if the migration was executed, False if it was skipped.""" + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM schema_migrations WHERE name = %s", (name,)) + if cur.fetchone(): + logger.info("%s — skipped (already applied)", name) + return False + t0 = time.time() + fn(conn) + with conn.cursor() as cur: + cur.execute( + "INSERT INTO schema_migrations (name) VALUES (%s) ON CONFLICT DO NOTHING", + (name,), + ) + conn.commit() + logger.info("%s — executed in %dms", name, int((time.time() - t0) * 1000)) + return True + + +def migrate_create_library(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS library ( @@ -57,11 +76,12 @@ def migrate_create_library() -> None: created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) -def migrate_create_book_tags() -> None: +def migrate_create_book_tags(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS book_tags ( @@ -71,12 +91,13 @@ def migrate_create_book_tags() -> None: tag_type VARCHAR(20) NOT NULL, UNIQUE (filename, tag, tag_type) ) - """ + """, + conn, ) - _exec("CREATE INDEX IF NOT EXISTS idx_book_tags_filename ON book_tags (filename)") + _exec("CREATE INDEX IF NOT EXISTS idx_book_tags_filename ON book_tags (filename)", conn) -def migrate_create_reading_progress() -> None: +def migrate_create_reading_progress(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS reading_progress ( @@ -87,11 +108,12 @@ def migrate_create_reading_progress() -> None: progress INTEGER DEFAULT 0, updated_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) -def migrate_create_reading_sessions() -> None: +def migrate_create_reading_sessions(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS reading_sessions ( @@ -99,12 +121,13 @@ def migrate_create_reading_sessions() -> None: filename VARCHAR(600) NOT NULL REFERENCES library(filename) ON DELETE CASCADE, read_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) - _exec("CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename ON reading_sessions (filename)") + _exec("CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename ON reading_sessions (filename)", conn) -def migrate_create_library_cover_cache() -> None: +def migrate_create_library_cover_cache(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS library_cover_cache ( @@ -113,11 +136,12 @@ def migrate_create_library_cover_cache() -> None: thumb_webp BYTEA NOT NULL, updated_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) -def migrate_create_credentials() -> None: +def migrate_create_credentials(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS credentials ( @@ -127,13 +151,14 @@ def migrate_create_credentials() -> None: password TEXT NOT NULL, updated_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) - _exec("ALTER TABLE credentials ALTER COLUMN username TYPE TEXT") - _exec("ALTER TABLE credentials ALTER COLUMN password TYPE TEXT") + _exec("ALTER TABLE credentials ALTER COLUMN username TYPE TEXT", conn) + _exec("ALTER TABLE credentials ALTER COLUMN password TYPE TEXT", conn) -def migrate_create_break_patterns() -> None: +def migrate_create_break_patterns(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS break_patterns ( @@ -145,39 +170,35 @@ def migrate_create_break_patterns() -> None: created_at TIMESTAMP DEFAULT NOW(), UNIQUE (pattern_type, pattern) ) - """ + """, + conn, ) -def migrate_seed_break_patterns() -> None: - conn = direct_connect() - try: - with conn: - with conn.cursor() as cur: - for pat in _DEFAULT_REGEX: - re.compile(pat) - cur.execute( - """ - INSERT INTO break_patterns (pattern_type, pattern, is_default) - VALUES ('regex', %s, TRUE) - ON CONFLICT (pattern_type, pattern) DO NOTHING - """, - (pat,), - ) - for pat in _DEFAULT_CSS: - cur.execute( - """ - INSERT INTO break_patterns (pattern_type, pattern, is_default) - VALUES ('css_class', %s, TRUE) - ON CONFLICT (pattern_type, pattern) DO NOTHING - """, - (pat,), - ) - finally: - conn.close() +def migrate_seed_break_patterns(conn) -> None: + with conn.cursor() as cur: + for pat in _DEFAULT_REGEX: + re.compile(pat) + cur.execute( + """ + INSERT INTO break_patterns (pattern_type, pattern, is_default) + VALUES ('regex', %s, TRUE) + ON CONFLICT (pattern_type, pattern) DO NOTHING + """, + (pat,), + ) + for pat in _DEFAULT_CSS: + cur.execute( + """ + INSERT INTO break_patterns (pattern_type, pattern, is_default) + VALUES ('css_class', %s, TRUE) + ON CONFLICT (pattern_type, pattern) DO NOTHING + """, + (pat,), + ) -def migrate_create_backup_log() -> None: +def migrate_create_backup_log(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS backup_log ( @@ -189,15 +210,20 @@ def migrate_create_backup_log() -> None: started_at TIMESTAMP DEFAULT NOW(), finished_at TIMESTAMP ) - """ + """, + conn, ) -def migrate_add_rating() -> None: - _exec("ALTER TABLE library ADD COLUMN IF NOT EXISTS rating SMALLINT NOT NULL DEFAULT 0") +def migrate_backup_log_scanned_files(conn) -> None: + _exec("ALTER TABLE backup_log ADD COLUMN IF NOT EXISTS scanned_files INTEGER", conn) -def migrate_create_bookmarks() -> None: +def migrate_add_rating(conn) -> None: + _exec("ALTER TABLE library ADD COLUMN IF NOT EXISTS rating SMALLINT NOT NULL DEFAULT 0", conn) + + +def migrate_create_bookmarks(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS bookmarks ( @@ -209,17 +235,17 @@ def migrate_create_bookmarks() -> None: note TEXT NOT NULL DEFAULT '', created_at TIMESTAMPTZ DEFAULT NOW() ) - """ + """, + conn, ) - _exec("CREATE INDEX IF NOT EXISTS idx_bookmarks_filename ON bookmarks (filename)") + _exec("CREATE INDEX IF NOT EXISTS idx_bookmarks_filename ON bookmarks (filename)", conn) -def migrate_remove_cover_missing_tag() -> None: - _exec("DELETE FROM book_tags WHERE tag = 'Cover Missing' AND tag_type = 'tag'") +def migrate_remove_cover_missing_tag(conn) -> None: + _exec("DELETE FROM book_tags WHERE tag = 'Cover Missing' AND tag_type = 'tag'", conn) -def migrate_create_perf_indexes() -> None: - # Match library list sorting and common filters. +def migrate_create_perf_indexes(conn) -> None: _exec( """ CREATE INDEX IF NOT EXISTS idx_library_sort_coalesce @@ -230,38 +256,38 @@ def migrate_create_perf_indexes() -> None: series_index, (COALESCE(title, '')) ) - """ + """, + conn, ) - _exec("CREATE INDEX IF NOT EXISTS idx_library_needs_review ON library (needs_review)") - _exec("CREATE INDEX IF NOT EXISTS idx_library_archived ON library (archived)") - - # Speeds grouped reads + recent-read lookups. + _exec("CREATE INDEX IF NOT EXISTS idx_library_needs_review ON library (needs_review)", conn) + _exec("CREATE INDEX IF NOT EXISTS idx_library_archived ON library (archived)", conn) _exec( """ CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename_readat ON reading_sessions (filename, read_at DESC) - """ + """, + conn, ) - - # Helps ORDER BY filename, tag fetch for tag-map construction. _exec( """ CREATE INDEX IF NOT EXISTS idx_book_tags_filename_tag ON book_tags (filename, tag) - """ + """, + conn, ) -def migrate_series_suffix() -> None: +def migrate_series_suffix(conn) -> None: _exec( """ ALTER TABLE library ADD COLUMN IF NOT EXISTS series_suffix VARCHAR(10) NOT NULL DEFAULT '' - """ + """, + conn, ) -def migrate_create_builder_drafts() -> None: +def migrate_create_builder_drafts(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS builder_drafts ( @@ -274,11 +300,12 @@ def migrate_create_builder_drafts() -> None: created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) -def migrate_create_authors() -> None: +def migrate_create_authors(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS authors ( @@ -288,21 +315,23 @@ def migrate_create_authors() -> None: created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ) - """ + """, + conn, ) -def migrate_rename_hiatus() -> None: - _exec("UPDATE library SET publication_status = 'Long-Term Hold' WHERE publication_status = 'Hiatus'") +def migrate_rename_hiatus(conn) -> None: + _exec("UPDATE library SET publication_status = 'Long-Term Hold' WHERE publication_status = 'Hiatus'", conn) -def migrate_add_storage_type() -> None: +def migrate_add_storage_type(conn) -> None: _exec( - "ALTER TABLE library ADD COLUMN IF NOT EXISTS storage_type VARCHAR(10) NOT NULL DEFAULT 'file'" + "ALTER TABLE library ADD COLUMN IF NOT EXISTS storage_type VARCHAR(10) NOT NULL DEFAULT 'file'", + conn, ) -def migrate_create_book_images() -> None: +def migrate_create_book_images(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS book_images ( @@ -311,11 +340,12 @@ def migrate_create_book_images() -> None: media_type VARCHAR(100) NOT NULL, size_bytes INTEGER NOT NULL DEFAULT 0 ) - """ + """, + conn, ) -def migrate_create_book_chapters() -> None: +def migrate_create_book_chapters(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS book_chapters ( @@ -327,29 +357,33 @@ def migrate_create_book_chapters() -> None: content_tsv TSVECTOR, UNIQUE (filename, chapter_index) ) - """ + """, + conn, ) _exec( - "CREATE INDEX IF NOT EXISTS idx_book_chapters_filename ON book_chapters (filename, chapter_index)" + "CREATE INDEX IF NOT EXISTS idx_book_chapters_filename ON book_chapters (filename, chapter_index)", + conn, ) _exec( - "CREATE INDEX IF NOT EXISTS idx_book_chapters_tsv ON book_chapters USING GIN (content_tsv)" + "CREATE INDEX IF NOT EXISTS idx_book_chapters_tsv ON book_chapters USING GIN (content_tsv)", + conn, ) -def migrate_rebuild_chapter_tsv_with_title() -> None: - """Rebuild content_tsv to include chapter title (safe to run repeatedly).""" +def migrate_rebuild_chapter_tsv_with_title(conn) -> None: + """Rebuild content_tsv to include chapter title. Runs once via schema_migrations tracking.""" _exec( """ UPDATE book_chapters SET content_tsv = to_tsvector('simple', COALESCE(title, '') || ' ' || regexp_replace(COALESCE(content, ''), '<[^>]*>', ' ', 'g')) - """ + """, + conn, ) -def migrate_create_app_settings() -> None: +def migrate_create_app_settings(conn) -> None: _exec( """ CREATE TABLE IF NOT EXISTS app_settings ( @@ -357,47 +391,75 @@ def migrate_create_app_settings() -> None: develop_mode BOOLEAN NOT NULL DEFAULT FALSE, CONSTRAINT single_row CHECK (id = 1) ) - """ + """, + conn, ) - _exec("INSERT INTO app_settings (id, develop_mode) VALUES (1, FALSE) ON CONFLICT DO NOTHING") + _exec("INSERT INTO app_settings (id, develop_mode) VALUES (1, FALSE) ON CONFLICT DO NOTHING", conn) -def migrate_app_settings_break_image() -> None: - _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_sha256 VARCHAR(64) DEFAULT NULL") - _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_ext VARCHAR(10) DEFAULT NULL") +def migrate_app_settings_break_image(conn) -> None: + _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_sha256 VARCHAR(64) DEFAULT NULL", conn) + _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_ext VARCHAR(10) DEFAULT NULL", conn) -def migrate_series_volume() -> None: +def migrate_series_volume(conn) -> None: _exec( """ ALTER TABLE library ADD COLUMN IF NOT EXISTS series_volume VARCHAR(20) NOT NULL DEFAULT '' - """ + """, + conn, ) def run_migrations() -> None: - migrate_create_library() - migrate_create_book_tags() - migrate_create_reading_progress() - migrate_create_reading_sessions() - migrate_create_library_cover_cache() - migrate_create_credentials() - migrate_create_break_patterns() - migrate_create_backup_log() - migrate_create_perf_indexes() - migrate_seed_break_patterns() - migrate_add_rating() - migrate_remove_cover_missing_tag() - migrate_create_bookmarks() - migrate_series_suffix() - migrate_create_builder_drafts() - migrate_create_authors() - migrate_rename_hiatus() - migrate_add_storage_type() - migrate_create_book_images() - migrate_create_book_chapters() - migrate_rebuild_chapter_tsv_with_title() - migrate_create_app_settings() - migrate_app_settings_break_image() - migrate_series_volume() + t_start = time.time() + logger.info("Starting migrations...") + conn = direct_connect() + try: + # Bootstrap: create schema_migrations table (always idempotent, no tracking needed). + with conn: + with conn.cursor() as cur: + cur.execute( + """ + CREATE TABLE IF NOT EXISTS schema_migrations ( + name VARCHAR(200) PRIMARY KEY, + applied_at TIMESTAMP DEFAULT NOW() + ) + """ + ) + + executed = sum([ + _run_once(conn, "create_library", migrate_create_library), + _run_once(conn, "create_book_tags", migrate_create_book_tags), + _run_once(conn, "create_reading_progress", migrate_create_reading_progress), + _run_once(conn, "create_reading_sessions", migrate_create_reading_sessions), + _run_once(conn, "create_library_cover_cache", migrate_create_library_cover_cache), + _run_once(conn, "create_credentials", migrate_create_credentials), + _run_once(conn, "create_break_patterns", migrate_create_break_patterns), + _run_once(conn, "create_backup_log", migrate_create_backup_log), + _run_once(conn, "create_perf_indexes", migrate_create_perf_indexes), + _run_once(conn, "seed_break_patterns", migrate_seed_break_patterns), + _run_once(conn, "add_rating", migrate_add_rating), + _run_once(conn, "remove_cover_missing_tag", migrate_remove_cover_missing_tag), + _run_once(conn, "create_bookmarks", migrate_create_bookmarks), + _run_once(conn, "series_suffix", migrate_series_suffix), + _run_once(conn, "create_builder_drafts", migrate_create_builder_drafts), + _run_once(conn, "create_authors", migrate_create_authors), + _run_once(conn, "rename_hiatus", migrate_rename_hiatus), + _run_once(conn, "add_storage_type", migrate_add_storage_type), + _run_once(conn, "create_book_images", migrate_create_book_images), + _run_once(conn, "create_book_chapters", migrate_create_book_chapters), + _run_once(conn, "rebuild_chapter_tsv_with_title", migrate_rebuild_chapter_tsv_with_title), + _run_once(conn, "create_app_settings", migrate_create_app_settings), + _run_once(conn, "app_settings_break_image", migrate_app_settings_break_image), + _run_once(conn, "series_volume", migrate_series_volume), + _run_once(conn, "backup_log_scanned_files", migrate_backup_log_scanned_files), + ]) + finally: + conn.close() + elapsed = time.time() - t_start + if executed == 0: + logger.info("Migrations complete in %.1fs — all already applied", elapsed) + else: + logger.info("Migrations complete in %.1fs — %d executed", elapsed, executed) diff --git a/containers/novela/pdf.py b/containers/novela/pdf.py index d6c2e79..c7f64f5 100644 --- a/containers/novela/pdf.py +++ b/containers/novela/pdf.py @@ -35,19 +35,24 @@ def _webp_thumb_from_image(path: Path) -> bytes: return out.getvalue() +def _webp_thumb_from_pil(im: Image.Image) -> bytes: + if im.mode not in ("RGB", "RGBA"): + im = im.convert("RGB") + thumb = ImageOps.fit(im, (COVER_W, COVER_H), method=Image.Resampling.LANCZOS) + from io import BytesIO + out = BytesIO() + thumb.save(out, format="WEBP", quality=82, method=6) + return out.getvalue() + + def pdf_cover_thumb(path: Path) -> bytes: with fitz.open(path) as doc: if doc.page_count == 0: raise ValueError("PDF has no pages") page = doc.load_page(0) pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5), alpha=False) - tmp = path.with_suffix(".cover.tmp.png") - try: - pix.save(tmp) - return _webp_thumb_from_image(tmp) - finally: - if tmp.exists(): - tmp.unlink(missing_ok=True) + img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples) + return _webp_thumb_from_pil(img) def pdf_scan_metadata(path: Path) -> dict: diff --git a/containers/novela/routers/backup.py b/containers/novela/routers/backup.py index 043d49e..1a1cb2f 100644 --- a/containers/novela/routers/backup.py +++ b/containers/novela/routers/backup.py @@ -408,11 +408,11 @@ def _dbx() -> dropbox.Dropbox: oauth2_refresh_token=token, app_key=app_key, app_secret=app_secret, - timeout=120, + timeout=300, ) else: # Fallback: legacy access token - client = dropbox.Dropbox(token, timeout=120) + client = dropbox.Dropbox(token, timeout=300) client.users_get_current_account() except AuthError as e: @@ -434,8 +434,8 @@ def _ensure_dropbox_dir(client: dropbox.Dropbox, path: str) -> None: pass -_DROPBOX_UPLOAD_CHUNK = 100 * 1024 * 1024 # 100 MB — below the 150 MB files_upload limit -_DROPBOX_UPLOAD_THRESHOLD = 148 * 1024 * 1024 # use session upload above this size +_DROPBOX_UPLOAD_CHUNK = 16 * 1024 * 1024 # 16 MB — keeps each chunk well within request timeout +_DROPBOX_UPLOAD_THRESHOLD = 16 * 1024 * 1024 # use session upload above this size def _dropbox_upload_bytes(client: dropbox.Dropbox, target_path: str, data: bytes) -> int: @@ -621,7 +621,15 @@ def _insert_backup_log_running() -> int: return int(cur.fetchone()[0]) -def _finish_backup_log(log_id: int, *, status: str, files_count: int | None, size_bytes: int | None, error_msg: str | None) -> None: +def _finish_backup_log( + log_id: int, + *, + status: str, + files_count: int | None, + scanned_files: int | None, + size_bytes: int | None, + error_msg: str | None, +) -> None: with get_db_conn() as conn: with conn: with conn.cursor() as cur: @@ -630,12 +638,13 @@ def _finish_backup_log(log_id: int, *, status: str, files_count: int | None, siz UPDATE backup_log SET status = %s, files_count = %s, + scanned_files = %s, size_bytes = %s, error_msg = %s, finished_at = NOW() WHERE id = %s """, - (status, files_count, size_bytes, error_msg, log_id), + (status, files_count, scanned_files, size_bytes, error_msg, log_id), ) @@ -695,7 +704,7 @@ def _prune_orphan_objects(client: dropbox.Dropbox, objects_root: str, referenced return _dropbox_delete_paths(client, to_delete) -def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> tuple[int, int]: +def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> tuple[int, int, int]: def _prog(done: int, total: int, phase: str) -> None: if progress_key is not None: BACKUP_PROGRESS[progress_key] = {"done": done, "total": total, "phase": phase} @@ -792,7 +801,7 @@ def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> t if not dry_run: _save_manifest(new_manifest) - return uploaded_count, uploaded_size + return total_files, uploaded_count, uploaded_size @router.get("/backup", response_class=HTMLResponse) @@ -953,7 +962,7 @@ async def backup_status(): with conn.cursor() as cur: cur.execute( """ - SELECT id, status, files_count, size_bytes, error_msg, started_at, finished_at + SELECT id, status, files_count, scanned_files, size_bytes, error_msg, started_at, finished_at FROM backup_log ORDER BY started_at DESC LIMIT 1 @@ -965,11 +974,12 @@ async def backup_status(): return { "id": row[0], "status": row[1], - "files_count": row[2], - "size_bytes": row[3], - "error_msg": row[4], - "started_at": row[5].isoformat() if row[5] else None, - "finished_at": row[6].isoformat() if row[6] else None, + "uploaded_files": row[2], + "scanned_files": row[3], + "size_bytes": row[4], + "error_msg": row[5], + "started_at": row[6].isoformat() if row[6] else None, + "finished_at": row[7].isoformat() if row[7] else None, } @@ -979,7 +989,7 @@ async def backup_history(): with conn.cursor() as cur: cur.execute( """ - SELECT id, status, files_count, size_bytes, error_msg, started_at, finished_at + SELECT id, status, files_count, scanned_files, size_bytes, error_msg, started_at, finished_at FROM backup_log ORDER BY started_at DESC LIMIT 20 @@ -990,11 +1000,12 @@ async def backup_history(): { "id": r[0], "status": r[1], - "files_count": r[2], - "size_bytes": r[3], - "error_msg": r[4], - "started_at": r[5].isoformat() if r[5] else None, - "finished_at": r[6].isoformat() if r[6] else None, + "uploaded_files": r[2], + "scanned_files": r[3], + "size_bytes": r[4], + "error_msg": r[5], + "started_at": r[6].isoformat() if r[6] else None, + "finished_at": r[7].isoformat() if r[7] else None, } for r in rows ] @@ -1002,6 +1013,10 @@ async def backup_history(): def _start_backup_task(*, dry_run: bool) -> int: log_id = _insert_backup_log_running() + while len(BACKUP_TASKS) >= 50: + oldest = next(iter(BACKUP_TASKS)) + BACKUP_TASKS.pop(oldest, None) + BACKUP_PROGRESS.pop(oldest, None) task = asyncio.create_task(_run_backup_job(log_id, dry_run)) BACKUP_TASKS[log_id] = task return log_id @@ -1062,13 +1077,14 @@ async def stop_backup_scheduler() -> None: async def _run_backup_job(log_id: int, dry_run: bool) -> None: BACKUP_PROGRESS[log_id] = {"done": 0, "total": 0, "phase": "starting"} try: - files_count, size_bytes = await asyncio.to_thread( + scanned_files, files_count, size_bytes = await asyncio.to_thread( _run_backup_internal, dry_run=dry_run, progress_key=log_id ) _finish_backup_log( log_id, status="success", files_count=files_count, + scanned_files=scanned_files, size_bytes=size_bytes, error_msg=None, ) @@ -1077,6 +1093,7 @@ async def _run_backup_job(log_id: int, dry_run: bool) -> None: log_id, status="error", files_count=None, + scanned_files=None, size_bytes=None, error_msg=str(e), ) diff --git a/containers/novela/routers/builder.py b/containers/novela/routers/builder.py index 2382405..9eca6c6 100644 --- a/containers/novela/routers/builder.py +++ b/containers/novela/routers/builder.py @@ -82,14 +82,14 @@ async def create_draft(request: Request): return HTMLResponse("Titel en auteur zijn verplicht", status_code=400) with get_db_conn() as conn: - with conn.cursor() as cur: - cur.execute( - "INSERT INTO builder_drafts (title, author, publisher, source_url, chapters) " - "VALUES (%s, %s, %s, %s, '[]'::jsonb) RETURNING id", - (title, author, publisher, source_url), - ) - draft_id = str(cur.fetchone()[0]) - conn.commit() + with conn: + with conn.cursor() as cur: + cur.execute( + "INSERT INTO builder_drafts (title, author, publisher, source_url, chapters) " + "VALUES (%s, %s, %s, %s, '[]'::jsonb) RETURNING id", + (title, author, publisher, source_url), + ) + draft_id = str(cur.fetchone()[0]) return RedirectResponse(f"/builder/{draft_id}", status_code=303) @@ -97,9 +97,9 @@ async def create_draft(request: Request): @router.delete("/api/builder/{draft_id}") async def delete_draft(draft_id: str): with get_db_conn() as conn: - with conn.cursor() as cur: - cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,)) - conn.commit() + with conn: + with conn.cursor() as cur: + cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,)) return JSONResponse({"ok": True}) @@ -130,13 +130,13 @@ async def add_chapter(draft_id: str, request: Request): insert_at = after_index + 1 if 0 <= after_index < len(chapters) else len(chapters) chapters.insert(insert_at, new_chapter) - with conn.cursor() as cur: - cur.execute( - "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " - "WHERE id = %s", - (json.dumps(chapters), draft_id), - ) - conn.commit() + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " + "WHERE id = %s", + (json.dumps(chapters), draft_id), + ) return JSONResponse({"ok": True, "index": insert_at, "count": len(chapters)}) @@ -159,13 +159,13 @@ async def save_chapter(draft_id: str, idx: int, request: Request): if "content" in body: chapters[idx]["content"] = body["content"] - with conn.cursor() as cur: - cur.execute( - "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " - "WHERE id = %s", - (json.dumps(chapters), draft_id), - ) - conn.commit() + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " + "WHERE id = %s", + (json.dumps(chapters), draft_id), + ) return JSONResponse({"ok": True}) @@ -187,13 +187,13 @@ async def delete_chapter(draft_id: str, idx: int): chapters.pop(idx) - with conn.cursor() as cur: - cur.execute( - "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " - "WHERE id = %s", - (json.dumps(chapters), draft_id), - ) - conn.commit() + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " + "WHERE id = %s", + (json.dumps(chapters), draft_id), + ) return JSONResponse({"ok": True, "index": min(idx, len(chapters) - 1), "count": len(chapters)}) @@ -260,10 +260,9 @@ async def publish_draft(draft_id: str): "needs_review": True, "has_cover": False, } - upsert_book(conn, filename, meta) - - with conn.cursor() as cur: - cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,)) - conn.commit() + with conn: + upsert_book(conn, filename, meta) + with conn.cursor() as cur: + cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,)) return JSONResponse({"ok": True, "filename": filename}) diff --git a/containers/novela/routers/common.py b/containers/novela/routers/common.py index 00a44d3..8aae040 100644 --- a/containers/novela/routers/common.py +++ b/containers/novela/routers/common.py @@ -14,6 +14,7 @@ from PIL import Image, ImageOps, UnidentifiedImageError from cbr import cbr_cover_thumb, cbr_page_count from db import get_db_conn +from epub_utils import find_opf_path from pdf import pdf_cover_thumb, pdf_page_count, pdf_scan_metadata LIBRARY_DIR = Path("library") @@ -221,18 +222,6 @@ def prune_empty_dirs(start_dir: Path) -> None: cur = cur.parent -def _find_opf_path(names: set[str], container_xml: str | None) -> str | None: - opf_path = "OEBPS/content.opf" - if container_xml: - m = re.search(r"full-path\s*=\s*['\"]([^'\"]+)['\"]", container_xml) - if m: - opf_path = m.group(1) - if opf_path in names: - return opf_path - candidates = sorted(n for n in names if n.lower().endswith(".opf")) - return candidates[0] if candidates else None - - def scan_epub(path: Path) -> dict: out = { "has_cover": False, @@ -254,7 +243,7 @@ def scan_epub(path: Path) -> dict: names = set(z.namelist()) out["has_cover"] = extract_cover_from_epub(path) is not None container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None - opf_path = _find_opf_path(names, container_xml) + opf_path = find_opf_path(names, container_xml) if not opf_path or opf_path not in names: return out opf = z.read(opf_path).decode("utf-8", errors="replace") diff --git a/containers/novela/routers/editor.py b/containers/novela/routers/editor.py index c00b1b7..f513926 100644 --- a/containers/novela/routers/editor.py +++ b/containers/novela/routers/editor.py @@ -1,9 +1,6 @@ -import html as _html import posixpath -import re import uuid import zipfile as zf -from pathlib import Path from bs4 import BeautifulSoup from fastapi import APIRouter, Request @@ -11,150 +8,13 @@ from fastapi.responses import HTMLResponse, JSONResponse, Response from shared_templates import templates from db import get_db_conn -from epub import read_epub_file, write_epub_file +from epub import make_intro_xhtml, read_epub_file, write_epub_file +from epub_utils import build_book_info_body_html, epub_spine, find_opf_path, make_new_chapter_xhtml, norm_href, rewrite_epub_entries from routers.common import LIBRARY_DIR, is_db_filename, resolve_library_path, upsert_chapter router = APIRouter() -def _norm(base_dir: str, rel: str) -> str: - rel = (rel or "").split("#", 1)[0].strip() - if not rel: - return "" - joined = posixpath.normpath(posixpath.join(base_dir, rel)) - return joined.lstrip("./") - - -def _epub_spine(path: Path) -> list[dict]: - with zf.ZipFile(path, "r") as z: - names = set(z.namelist()) - - opf_path = "OEBPS/content.opf" - try: - container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") - m = re.search(r"full-path\\s*=\\s*['\"]([^'\"]+)['\"]", container_xml) - if m: - opf_path = m.group(1) - except Exception: - pass - - if opf_path not in names: - candidates = [n for n in names if n.lower().endswith(".opf")] - if not candidates: - return [] - opf_path = sorted(candidates)[0] - - opf_xml = z.read(opf_path).decode("utf-8", errors="replace") - opf = BeautifulSoup(opf_xml, "xml") - opf_dir = posixpath.dirname(opf_path) - - manifest: dict[str, str] = {} - for item in opf.find_all("item"): - iid = item.get("id") - href = item.get("href") - if iid and href: - manifest[iid] = _norm(opf_dir, href) - - spine_idrefs: list[str] = [] - spine_tag = opf.find("spine") - toc_id = spine_tag.get("toc") if spine_tag else None - if spine_tag: - for ir in spine_tag.find_all("itemref"): - rid = ir.get("idref") - if rid: - spine_idrefs.append(rid) - - hrefs = [manifest[rid] for rid in spine_idrefs if rid in manifest] - href_to_title: dict[str, str] = {} - - ncx_path = "" - if toc_id and toc_id in manifest: - ncx_path = manifest[toc_id] - elif "toc.ncx" in names: - ncx_path = "toc.ncx" - elif "OEBPS/toc.ncx" in names: - ncx_path = "OEBPS/toc.ncx" - - if ncx_path and ncx_path in names: - try: - ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace") - ncx = BeautifulSoup(ncx_xml, "xml") - ncx_dir = posixpath.dirname(ncx_path) - for np in ncx.find_all("navPoint"): - content = np.find("content") - label_tag = np.find("text") - src = content.get("src") if content else "" - label = label_tag.get_text(strip=True) if label_tag else "" - if src and label: - href_to_title[_norm(ncx_dir, src)] = _html.unescape(label) - except Exception: - pass - - chapters = [] - for i, href in enumerate(hrefs): - base = posixpath.basename(href) - title = href_to_title.get(href, re.sub(r"\.(xhtml|html|htm)$", "", base, flags=re.I)) - chapters.append({"index": i, "title": title or f"Chapter {i+1}", "href": href}) - return chapters - - -def _norm_href(base_dir: str, rel: str) -> str: - rel = (rel or "").split("#", 1)[0].strip() - if not rel: - return "" - return posixpath.normpath(posixpath.join(base_dir, rel)).lstrip("./") - - -def _find_opf_path(names: set[str], container_xml: str | None) -> str | None: - opf_path = "OEBPS/content.opf" - if container_xml: - m = re.search(r"full-path\s*=\s*['\"]([^'\"]+)['\"]", container_xml) - if m: - opf_path = m.group(1) - if opf_path in names: - return opf_path - candidates = sorted(n for n in names if n.lower().endswith(".opf")) - return candidates[0] if candidates else None - - -def _make_new_chapter_xhtml(title: str) -> str: - safe_title = _html.escape((title or "New chapter").strip() or "New chapter") - return ( - "\n" - "\n" - "\n" - "\n" - " \n" - f" {safe_title}\n" - " \n" - "\n" - "\n" - f"

{safe_title}

\n" - "

\n" - "\n" - "\n" - ) - - -def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_paths: set[str] | None = None) -> None: - remove_paths = set(remove_paths or set()) - tmp = epub_path.with_suffix(".tmp.epub") - with zf.ZipFile(epub_path, "r") as zin, zf.ZipFile(tmp, "w", compression=zf.ZIP_DEFLATED) as zout: - names = zin.namelist() - for name in names: - if name in remove_paths: - continue - if name in updates: - zout.writestr(name, updates[name]) - else: - zout.writestr(name, zin.read(name)) - for name, data in updates.items(): - if name not in names: - zout.writestr(name, data) - tmp.replace(epub_path) - - @router.get("/library/editor/{filename:path}", response_class=HTMLResponse) async def editor_page(filename: str, request: Request): if not is_db_filename(filename): @@ -194,7 +54,7 @@ async def get_edit_chapter(filename: str, index: int): path = resolve_library_path(filename) if path is None or not path.exists(): return Response(status_code=404) - spine = _epub_spine(path) + spine = epub_spine(path) if index < 0 or index >= len(spine): return Response(status_code=404) ch = spine[index] @@ -229,17 +89,77 @@ async def save_edit_chapter(filename: str, index: int, request: Request): return JSONResponse({"error": "File not found"}, status_code=404) if not content: return JSONResponse({"error": "No content"}, status_code=400) - spine = _epub_spine(path) + spine = epub_spine(path) if index < 0 or index >= len(spine): return JSONResponse({"error": "Chapter not found"}, status_code=404) - href = spine[index]["href"] + ch = spine[index] + href = ch["href"] try: write_epub_file(path, href, content) except Exception as e: return JSONResponse({"error": str(e)}, status_code=500) + + new_title = (body.get("title") or "").strip() + if new_title and new_title != (ch["title"] or ""): + try: + _update_epub_navpoint_title(path, href, new_title) + except Exception as e: + return JSONResponse({"error": f"Title update failed: {e}"}, status_code=500) return JSONResponse({"ok": True}) +def _update_epub_navpoint_title(path, target_href: str, new_title: str) -> None: + """Update the NCX navPoint label whose content src resolves to ``target_href``.""" + with zf.ZipFile(path, "r") as z: + names = set(z.namelist()) + container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None + opf_path = find_opf_path(names, container_xml) + if not opf_path: + return + + opf_xml = z.read(opf_path).decode("utf-8", errors="replace") + opf = BeautifulSoup(opf_xml, "xml") + opf_dir = posixpath.dirname(opf_path) + + manifest: dict[str, str] = {} + for item in opf.find_all("item"): + iid = item.get("id") + href = item.get("href") + if iid and href: + manifest[iid] = norm_href(opf_dir, href) + + spine_tag = opf.find("spine") + if not spine_tag: + return + toc_id = spine_tag.get("toc") + ncx_path = manifest.get(toc_id, "") if toc_id else "" + if not ncx_path: + for item in opf.find_all("item"): + mt = (item.get("media-type") or "").lower() + if mt == "application/x-dtbncx+xml" and item.get("href"): + ncx_path = norm_href(opf_dir, item.get("href")) + break + if not ncx_path or ncx_path not in names: + return + + ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace") + + ncx = BeautifulSoup(ncx_xml, "xml") + ncx_dir = posixpath.dirname(ncx_path) + changed = False + for np in ncx.find_all("navPoint"): + content_tag = np.find("content") + src = content_tag.get("src") if content_tag else "" + if src and norm_href(ncx_dir, src) == target_href: + text = np.find("text") + if text is not None: + text.string = new_title + changed = True + break + if changed: + rewrite_epub_entries(path, {ncx_path: str(ncx).encode("utf-8")}) + + @router.post("/api/edit/chapter/add/{filename:path}") async def add_edit_chapter(filename: str, request: Request): body = await request.json() @@ -261,10 +181,18 @@ async def add_edit_chapter(filename: str, request: Request): insert_idx = total if after_index < 0 or after_index >= total else after_index + 1 with conn: with conn.cursor() as cur: + # Two-step increment to avoid unique constraint violations. + # PostgreSQL checks the constraint per-row, so incrementing + # consecutive indices in a single UPDATE (1→2 while 2 exists) + # raises a UniqueViolation. Using negatives as a safe intermediate. cur.execute( - "UPDATE book_chapters SET chapter_index = chapter_index + 1 WHERE filename = %s AND chapter_index >= %s", + "UPDATE book_chapters SET chapter_index = -(chapter_index + 1) WHERE filename = %s AND chapter_index >= %s", (filename, insert_idx), ) + cur.execute( + "UPDATE book_chapters SET chapter_index = -chapter_index WHERE filename = %s AND chapter_index < 0", + (filename,), + ) upsert_chapter(conn, filename, insert_idx, title, "") return JSONResponse({"ok": True, "index": insert_idx, "count": total + 1}) @@ -274,10 +202,6 @@ async def add_edit_chapter(filename: str, request: Request): if not path.exists(): return JSONResponse({"error": "File not found"}, status_code=404) - try: - after_index = int(after_index) - except Exception: - after_index = -1 try: after_index = int(after_index) except Exception: @@ -286,7 +210,7 @@ async def add_edit_chapter(filename: str, request: Request): with zf.ZipFile(path, "r") as z: names = set(z.namelist()) container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None - opf_path = _find_opf_path(names, container_xml) + opf_path = find_opf_path(names, container_xml) if not opf_path: return JSONResponse({"error": "OPF not found"}, status_code=400) @@ -299,7 +223,7 @@ async def add_edit_chapter(filename: str, request: Request): iid = item.get("id") href = item.get("href") if iid and href: - manifest[iid] = _norm_href(opf_dir, href) + manifest[iid] = norm_href(opf_dir, href) spine_tag = opf.find("spine") if not spine_tag: @@ -323,7 +247,7 @@ async def add_edit_chapter(filename: str, request: Request): while True: stem = f"chapter_added_{uuid.uuid4().hex[:8]}" rel = posixpath.join(ref_dir_rel, f"{stem}.xhtml") if ref_dir_rel else f"{stem}.xhtml" - abs_path = _norm_href(opf_dir, rel) + abs_path = norm_href(opf_dir, rel) if abs_path not in names: break @@ -357,7 +281,7 @@ async def add_edit_chapter(filename: str, request: Request): for item in opf.find_all("item"): mt = (item.get("media-type") or "").lower() if mt == "application/x-dtbncx+xml" and item.get("href"): - ncx_path = _norm_href(opf_dir, item.get("href")) + ncx_path = norm_href(opf_dir, item.get("href")) break updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")} @@ -390,14 +314,191 @@ async def add_edit_chapter(filename: str, request: Request): updates[ncx_path] = str(ncx).encode("utf-8") - updates[abs_path] = _make_new_chapter_xhtml(title).encode("utf-8") - _rewrite_epub_entries(path, updates) + updates[abs_path] = make_new_chapter_xhtml(title).encode("utf-8") + rewrite_epub_entries(path, updates) - new_spine = _epub_spine(path) + new_spine = epub_spine(path) new_index = min(max(after_index + 1, 0), max(len(new_spine) - 1, 0)) return JSONResponse({"ok": True, "index": new_index, "count": len(new_spine)}) +def _load_book_info_from_db(filename: str) -> tuple[dict, str, str] | None: + """Return (book_info, title, author) for the book, or None if unknown.""" + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """SELECT title, author, publisher, series, series_index, publication_status, + source_url, description, publish_date + FROM library WHERE filename = %s""", + (filename,), + ) + meta_row = cur.fetchone() + if not meta_row: + return None + cur.execute( + "SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag_type, tag", + (filename,), + ) + tag_rows = cur.fetchall() + + (title, author, publisher, series, series_index, + pub_status, source_url, description, pub_date) = meta_row + genres = [t for t, tp in tag_rows if tp == "genre"] + subgenres = [t for t, tp in tag_rows if tp == "subgenre"] + tags = [t for t, tp in tag_rows if tp in ("tag", "subject")] + info = { + "genres": genres, + "subgenres": subgenres, + "tags": tags, + "description": description or "", + "source_url": source_url or "", + "publisher": publisher or "", + "series": series or "", + "series_index": series_index or 1, + "publication_status": pub_status or "", + "updated_date": pub_date.isoformat() if pub_date else "", + } + return info, title or "", author or "" + + +@router.post("/api/edit/intro/{filename:path}") +async def add_intro_chapter(filename: str): + """Insert a generated 'Book Info' page as the first chapter/spine item.""" + loaded = _load_book_info_from_db(filename) + if loaded is None: + return JSONResponse({"error": "not found"}, status_code=404) + info, title, author = loaded + + if is_db_filename(filename): + body_html = build_book_info_body_html(title, author, info) + with get_db_conn() as conn: + with conn: + with conn.cursor() as cur: + # Shift existing chapter indices up by 1 via two-step + # negation to avoid unique-constraint violations. + cur.execute( + "UPDATE book_chapters SET chapter_index = -(chapter_index + 1) WHERE filename = %s", + (filename,), + ) + cur.execute( + "UPDATE book_chapters SET chapter_index = -chapter_index WHERE filename = %s AND chapter_index < 0", + (filename,), + ) + upsert_chapter(conn, filename, 0, "Book Info", body_html) + return JSONResponse({"ok": True, "index": 0}) + + path = resolve_library_path(filename) + if path is None or not path.exists(): + return JSONResponse({"error": "File not found"}, status_code=404) + + intro_xhtml = make_intro_xhtml(title, author, info) + + with zf.ZipFile(path, "r") as z: + names = set(z.namelist()) + container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None + opf_path = find_opf_path(names, container_xml) + if not opf_path: + return JSONResponse({"error": "OPF not found"}, status_code=400) + + opf_xml = z.read(opf_path).decode("utf-8", errors="replace") + opf = BeautifulSoup(opf_xml, "xml") + opf_dir = posixpath.dirname(opf_path) + + manifest: dict[str, str] = {} + for item in opf.find_all("item"): + iid = item.get("id") + href = item.get("href") + if iid and href: + manifest[iid] = norm_href(opf_dir, href) + + spine_tag = opf.find("spine") + if not spine_tag: + return JSONResponse({"error": "Invalid OPF spine"}, status_code=400) + itemrefs = spine_tag.find_all("itemref") + + ref_dir_rel = "Text" + if itemrefs: + ref_idref = itemrefs[0].get("idref", "") + ref_abs = manifest.get(ref_idref, "") + if ref_abs: + ref_rel = posixpath.relpath(ref_abs, opf_dir) + ref_dir_rel = posixpath.dirname(ref_rel) or "" + + while True: + stem = f"intro_{uuid.uuid4().hex[:8]}" + rel = posixpath.join(ref_dir_rel, f"{stem}.xhtml") if ref_dir_rel else f"{stem}.xhtml" + abs_path = norm_href(opf_dir, rel) + if abs_path not in names: + break + + existing_ids = {item.get("id") for item in opf.find_all("item") if item.get("id")} + i = 1 + new_id = f"intro_add_{i:03d}" + while new_id in existing_ids: + i += 1 + new_id = f"intro_add_{i:03d}" + + manifest_tag = opf.find("manifest") + if not manifest_tag: + return JSONResponse({"error": "Invalid OPF manifest"}, status_code=400) + + new_item = opf.new_tag("item") + new_item["id"] = new_id + new_item["href"] = rel + new_item["media-type"] = "application/xhtml+xml" + manifest_tag.append(new_item) + + new_itemref = opf.new_tag("itemref") + new_itemref["idref"] = new_id + if itemrefs: + itemrefs[0].insert_before(new_itemref) + else: + spine_tag.append(new_itemref) + + toc_id = spine_tag.get("toc") + ncx_path = manifest.get(toc_id, "") if toc_id else "" + if not ncx_path: + for item in opf.find_all("item"): + mt = (item.get("media-type") or "").lower() + if mt == "application/x-dtbncx+xml" and item.get("href"): + ncx_path = norm_href(opf_dir, item.get("href")) + break + + updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")} + if ncx_path and ncx_path in names: + ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace") + ncx = BeautifulSoup(ncx_xml, "xml") + nav_map = ncx.find("navMap") + if nav_map: + np = ncx.new_tag("navPoint") + np["id"] = f"{new_id}_nav" + label = ncx.new_tag("navLabel") + text = ncx.new_tag("text") + text.string = "Book Info" + label.append(text) + content = ncx.new_tag("content") + ncx_dir = posixpath.dirname(ncx_path) + content["src"] = posixpath.relpath(abs_path, ncx_dir) + np.append(label) + np.append(content) + + first_nav = nav_map.find("navPoint") + if first_nav: + first_nav.insert_before(np) + else: + nav_map.append(np) + + for idx, node in enumerate(nav_map.find_all("navPoint"), 1): + node["playOrder"] = str(idx) + + updates[ncx_path] = str(ncx).encode("utf-8") + + updates[abs_path] = intro_xhtml.encode("utf-8") + rewrite_epub_entries(path, updates) + + return JSONResponse({"ok": True, "index": 0}) + + @router.delete("/api/edit/chapter/{index:int}/{filename:path}") async def delete_edit_chapter(filename: str, index: int): if is_db_filename(filename): @@ -429,7 +530,7 @@ async def delete_edit_chapter(filename: str, index: int): with zf.ZipFile(path, "r") as z: names = set(z.namelist()) container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None - opf_path = _find_opf_path(names, container_xml) + opf_path = find_opf_path(names, container_xml) if not opf_path: return JSONResponse({"error": "OPF not found"}, status_code=400) @@ -442,7 +543,7 @@ async def delete_edit_chapter(filename: str, index: int): iid = item.get("id") href = item.get("href") if iid and href: - manifest[iid] = _norm_href(opf_dir, href) + manifest[iid] = norm_href(opf_dir, href) spine_tag = opf.find("spine") if not spine_tag: @@ -474,7 +575,7 @@ async def delete_edit_chapter(filename: str, index: int): for item in opf.find_all("item"): mt = (item.get("media-type") or "").lower() if mt == "application/x-dtbncx+xml" and item.get("href"): - ncx_path = _norm_href(opf_dir, item.get("href")) + ncx_path = norm_href(opf_dir, item.get("href")) break updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")} @@ -489,13 +590,13 @@ async def delete_edit_chapter(filename: str, index: int): for np in nav_map.find_all("navPoint"): content = np.find("content") src = content.get("src") if content else "" - if src and _norm_href(ncx_dir, src) == target_href: + if src and norm_href(ncx_dir, src) == target_href: np.decompose() for idx, node in enumerate(nav_map.find_all("navPoint"), 1): node["playOrder"] = str(idx) updates[ncx_path] = str(ncx).encode("utf-8") - _rewrite_epub_entries(path, updates, remove_paths) - new_spine = _epub_spine(path) + rewrite_epub_entries(path, updates, remove_paths) + new_spine = epub_spine(path) new_index = min(index, max(len(new_spine) - 1, 0)) return JSONResponse({"ok": True, "index": new_index, "count": len(new_spine)}) diff --git a/containers/novela/routers/grabber.py b/containers/novela/routers/grabber.py index e5c8279..6cc4382 100644 --- a/containers/novela/routers/grabber.py +++ b/containers/novela/routers/grabber.py @@ -15,6 +15,7 @@ from shared_templates import templates from db import get_db_conn from epub import detect_image_format, make_chapter_xhtml, make_epub +from epub_utils import build_book_info_body_html from routers.common import ( LIBRARY_DIR, ensure_unique_db_filename, @@ -31,7 +32,7 @@ from routers.common import ( from scrapers import get_scraper from scrapers.base import HEADERS from security import decrypt_value, encrypt_value, is_encrypted_value -from xhtml import configure_break_patterns, element_to_xhtml, is_break_element +from xhtml import collapse_consecutive_breaks, configure_break_patterns, element_to_xhtml, is_break_element router = APIRouter() @@ -215,13 +216,15 @@ async def debug_run(request: Request): "selector_id": ch_data.get("selector_id"), "selector_class": ch_data.get("selector_class"), "raw_html": raw_html[:8000], - "converted_xhtml": "\n".join(xhtml_parts)[:8000], + "converted_xhtml": collapse_consecutive_breaks("\n".join(xhtml_parts))[:8000], } except Exception as e: result["first_chapter"] = {"title": ch["title"], "url": ch["url"], "error": str(e)} except Exception: result["error"] = traceback.format_exc() + finally: + await scraper.close() return result @@ -279,14 +282,15 @@ async def preload(request: Request): except ValueError as e: return {"error": str(e)} - async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: - if username: - await scraper.login(client, username, password) - book = await scraper.fetch_book_info(client, url) - series = book.get("series", "") - hint = int(book.get("series_index_hint", 0) or 0) - title = book.get("title", "") - author = book.get("author", "") + try: + async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: + if username: + await scraper.login(client, username, password) + book = await scraper.fetch_book_info(client, url) + series = book.get("series", "") + hint = int(book.get("series_index_hint", 0) or 0) + title = book.get("title", "") + author = book.get("author", "") existing_books = [] if title or author: @@ -328,6 +332,8 @@ async def preload(request: Request): "already_exists": bool(existing_books), "existing_books": existing_books, } + finally: + await scraper.close() async def scrape_book(job_id: str, url: str, username: str, password: str) -> None: @@ -349,242 +355,247 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) scraper = get_scraper(url) - async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: - if username: - send("status", {"message": "Logging in..."}) - await scraper.login(client, username, password) + try: + async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: + if username: + send("status", {"message": "Logging in..."}) + await scraper.login(client, username, password) - book = await scraper.fetch_book_info(client, url) - book_title = book["title"] - author = book["author"] - send("meta", {"title": book_title, "author": author}) + book = await scraper.fetch_book_info(client, url) + book_title = book["title"] + author = book["author"] + send("meta", {"title": book_title, "author": author}) - if not book.get("chapters"): - send("error", {"message": "No chapters found. Check the URL or credentials."}) - job["done"] = True - return + if not book.get("chapters"): + send("error", {"message": "No chapters found. Check the URL or credentials."}) + job["done"] = True + return - send("chapters", {"chapters": [c["title"] for c in book["chapters"]]}) - send("status", {"message": f"Found {len(book['chapters'])} chapters. Downloading..."}) + send("chapters", {"chapters": [c["title"] for c in book["chapters"]]}) + send("status", {"message": f"Found {len(book['chapters'])} chapters. Downloading..."}) - cover_data: bytes | None = job.pop("cover_upload", None) + cover_data: bytes | None = job.pop("cover_upload", None) - tags = list(book.get("tags", [])) - if len(book["chapters"]) < 4 and "Shorts" not in tags: - tags.append("Shorts") - status_map = {"Temporary-Hold": "Temporary Hold"} - pub_status = status_map.get(book.get("publication_status", ""), book.get("publication_status", "")) + tags = list(book.get("tags", [])) + if len(book["chapters"]) < 4 and "Shorts" not in tags: + tags.append("Shorts") + status_map = {"Temporary-Hold": "Temporary Hold"} + pub_status = status_map.get(book.get("publication_status", ""), book.get("publication_status", "")) - series = book.get("series", "") - series_index = int(job.get("series_index", 1) or 1) + series = book.get("series", "") + series_index = int(job.get("series_index", 1) or 1) - updated_date_override = (job.pop("updated_date_override", "") or "").strip() - final_updated_date = ( - updated_date_override - or book.get("updated_date", "") - or datetime.now(timezone.utc).strftime("%Y-%m-%d") - ) - - book_info = { - "genres": book.get("genres", []), - "subgenres": book.get("subgenres", []), - "tags": tags, - "description": book.get("description", ""), - "updated_date": final_updated_date, - "source_url": book.get("source_url", ""), - "publisher": book.get("publisher", ""), - "series": series, - "series_index": series_index, - "publication_status": pub_status, - } - - _load_break_patterns() - - storage_mode = job.get("storage_mode", "db") - # Break image path depends on storage mode: - # - EPUB: relative path inside the EPUB ZIP (break.png is embedded) - # - DB: absolute URL served by the static files handler - if storage_mode == "epub": - break_img_path = "../Images/break.png" - else: - break_img_path = "/static/break.png" - - # Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]} - chapters = [] - for i, ch in enumerate(book["chapters"], 1): - send("progress", {"current": i, "total": len(book["chapters"]), "title": ch["title"]}) - try: - ch_data = await scraper.fetch_chapter(client, ch) - content_el = ch_data["content_el"] - - # Download images and store to disk (no DB yet); rewrite src to absolute URL - if content_el: - for img_tag in content_el.find_all("img"): - if is_break_element(img_tag): - # Replace the parent with
if it contains only - # this image, so element_to_xhtml can detect the break. - parent = img_tag.parent - meaningful = [ - c for c in parent.children - if not (isinstance(c, NavigableString) and not c.strip()) - ] - if len(meaningful) == 1 and parent is not content_el: - parent.replace_with(BeautifulSoup("
", "html.parser").hr) - else: - img_tag.decompose() - continue - src = img_tag.get("src", "") - if not src or src.startswith("data:"): - img_tag.decompose() - continue - try: - img_resp = await client.get(urljoin(ch["url"], src)) - if img_resp.status_code == 200: - _, img_mime = detect_image_format( - img_resp.content, f"ch{i:03d}_img" - ) - sha, ext_i, url = write_image_file(img_resp.content, img_mime) - img_tag["src"] = url - img_tag["alt"] = img_tag.get("alt", "") - img_tag.attrs = { - k: v for k, v in img_tag.attrs.items() - if k in ("src", "alt", "width", "height") - } - else: - img_tag.decompose() - except Exception: - img_tag.decompose() - - xhtml_parts = [] - if content_el: - all_p = content_el.find_all("p") - empty_p = sum( - 1 - for p in all_p - if not [c for c in p.children if isinstance(c, Tag)] - and not p.get_text().replace("\xa0", "").strip() - ) - filled_p = len(all_p) - empty_p - empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5 - for child in content_el.children: - part = element_to_xhtml(child, break_img_path=break_img_path, empty_p_is_spacer=empty_p_is_spacer) - if part.strip(): - xhtml_parts.append(part) - - content_html = "\n".join(xhtml_parts) - chapters.append({"title": ch_data["title"], "content_html": content_html}) - await asyncio.sleep(0.2) - except Exception as e: - send("warning", {"message": f"Chapter {i} skipped: {e}"}) - - if not chapters: - send("error", {"message": "No chapters could be processed."}) - job["done"] = True - return - - send("status", {"message": "Saving to library..."}) - - book_tags = ( - [(g, "genre") for g in book_info.get("genres", [])] - + [(g, "subgenre") for g in book_info.get("subgenres", [])] - + [(g, "tag") for g in book_info.get("tags", [])] - ) - - if storage_mode == "epub": - # ── EPUB file on disk ────────────────────────────────────────── - epub_chapters = [ - {"title": ch["title"], "xhtml": make_chapter_xhtml(ch["title"], ch["content_html"], i + 1), "images": []} - for i, ch in enumerate(chapters) - ] - try: - break_img_data = open("static/break.png", "rb").read() - except Exception: - break_img_data = b"" - epub_bytes = make_epub( - book_title, author, epub_chapters, cover_data, break_img_data, - str(uuid.uuid4()), book_info, + updated_date_override = (job.pop("updated_date_override", "") or "").strip() + final_updated_date = ( + updated_date_override + or book.get("updated_date", "") + or datetime.now(timezone.utc).strftime("%Y-%m-%d") ) - rel_path = make_rel_path( - media_type="epub", - publisher=book_info.get("publisher", ""), - author=author, - title=book_title, - series=series, - series_index=series_index, - ) - rel_path = ensure_unique_rel_path(rel_path) - out_path = LIBRARY_DIR / rel_path - out_path.parent.mkdir(parents=True, exist_ok=True) - out_path.write_bytes(epub_bytes) - rel_filename = rel_path.as_posix() - book_meta = { - "media_type": "epub", - "storage_type": "file", - "has_cover": cover_data is not None, + book_info = { + "genres": book.get("genres", []), + "subgenres": book.get("subgenres", []), + "tags": tags, + "description": book.get("description", ""), + "updated_date": final_updated_date, + "source_url": book.get("source_url", ""), + "publisher": book.get("publisher", ""), "series": series, - "series_index": series_index if series else 0, - "title": book_title, - "publication_status": book_info.get("publication_status", ""), - "author": author, - "publisher": book_info.get("publisher", ""), - "source_url": book_info.get("source_url", ""), - "description": book_info.get("description", ""), - "publish_date": final_updated_date, - "needs_review": False, + "series_index": series_index, + "publication_status": pub_status, } - with get_db_conn() as conn: - with conn: - upsert_book(conn, rel_filename, book_meta, book_tags) - if cover_data: - try: - thumb = make_cover_thumb_webp(cover_data) - upsert_cover_cache(conn, rel_filename, "image/webp", thumb) - except Exception: - pass - else: - # ── DB storage (default) ─────────────────────────────────────── - base_filename = make_rel_path( - media_type="db", - publisher=book_info.get("publisher", ""), - author=author, - title=book_title, - series=series, - series_index=series_index, - ).as_posix() + _load_break_patterns() - book_meta = { - "media_type": "epub", - "storage_type": "db", - "has_cover": cover_data is not None, - "series": book_info.get("series", ""), - "series_index": series_index if book_info.get("series") else 0, - "title": book_title, - "publication_status": book_info.get("publication_status", ""), - "author": author, - "publisher": book_info.get("publisher", ""), - "source_url": book_info.get("source_url", ""), - "description": book_info.get("description", ""), - "publish_date": final_updated_date, - "needs_review": False, - } - with get_db_conn() as conn: - with conn: - rel_filename = ensure_unique_db_filename(conn, base_filename) - upsert_book(conn, rel_filename, book_meta, book_tags) - for idx, ch in enumerate(chapters): - upsert_chapter(conn, rel_filename, idx, ch["title"], ch["content_html"]) - if cover_data: - try: - thumb = make_cover_thumb_webp(cover_data) - upsert_cover_cache(conn, rel_filename, "image/webp", thumb) - except Exception: - pass + storage_mode = job.get("storage_mode", "db") + # Break image path depends on storage mode: + # - EPUB: relative path inside the EPUB ZIP (break.png is embedded) + # - DB: absolute URL served by the static files handler + if storage_mode == "epub": + break_img_path = "../Images/break.png" + else: + break_img_path = "/static/break.png" - job["filename"] = rel_filename - send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters), "storage_type": storage_mode}) - job["done"] = True + # Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]} + chapters = [] + for i, ch in enumerate(book["chapters"], 1): + send("progress", {"current": i, "total": len(book["chapters"]), "title": ch["title"]}) + try: + ch_data = await scraper.fetch_chapter(client, ch) + content_el = ch_data["content_el"] + + # Download images and store to disk (no DB yet); rewrite src to absolute URL + if content_el: + for img_tag in content_el.find_all("img"): + if is_break_element(img_tag): + # Replace the parent with
if it contains only + # this image, so element_to_xhtml can detect the break. + parent = img_tag.parent + meaningful = [ + c for c in parent.children + if not (isinstance(c, NavigableString) and not c.strip()) + ] + if len(meaningful) == 1 and parent is not content_el: + parent.replace_with(BeautifulSoup("
", "html.parser").hr) + else: + img_tag.decompose() + continue + src = img_tag.get("src", "") + if not src or src.startswith("data:"): + img_tag.decompose() + continue + try: + img_resp = await client.get(urljoin(ch["url"], src)) + if img_resp.status_code == 200: + _, img_mime = detect_image_format( + img_resp.content, f"ch{i:03d}_img" + ) + sha, ext_i, url = write_image_file(img_resp.content, img_mime) + img_tag["src"] = url + img_tag["alt"] = img_tag.get("alt", "") + img_tag.attrs = { + k: v for k, v in img_tag.attrs.items() + if k in ("src", "alt", "width", "height") + } + else: + img_tag.decompose() + except Exception: + img_tag.decompose() + + xhtml_parts = [] + if content_el: + all_p = content_el.find_all("p") + empty_p = sum( + 1 + for p in all_p + if not [c for c in p.children if isinstance(c, Tag)] + and not p.get_text().replace("\xa0", "").strip() + ) + filled_p = len(all_p) - empty_p + empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5 + for child in content_el.children: + part = element_to_xhtml(child, break_img_path=break_img_path, empty_p_is_spacer=empty_p_is_spacer) + if part.strip(): + xhtml_parts.append(part) + + content_html = collapse_consecutive_breaks("\n".join(xhtml_parts)) + chapters.append({"title": ch_data["title"], "content_html": content_html}) + await asyncio.sleep(0.2) + except Exception as e: + send("warning", {"message": f"Chapter {i} skipped: {e}"}) + + if not chapters: + send("error", {"message": "No chapters could be processed."}) + job["done"] = True + return + + send("status", {"message": "Saving to library..."}) + + book_tags = ( + [(g, "genre") for g in book_info.get("genres", [])] + + [(g, "subgenre") for g in book_info.get("subgenres", [])] + + [(g, "tag") for g in book_info.get("tags", [])] + ) + + if storage_mode == "epub": + # ── EPUB file on disk ────────────────────────────────────────── + epub_chapters = [ + {"title": ch["title"], "xhtml": make_chapter_xhtml(ch["title"], ch["content_html"], i + 1), "images": []} + for i, ch in enumerate(chapters) + ] + try: + break_img_data = open("static/break.png", "rb").read() + except Exception: + break_img_data = b"" + epub_bytes = make_epub( + book_title, author, epub_chapters, cover_data, break_img_data, + str(uuid.uuid4()), book_info, + ) + rel_path = make_rel_path( + media_type="epub", + publisher=book_info.get("publisher", ""), + author=author, + title=book_title, + series=series, + series_index=series_index, + ) + rel_path = ensure_unique_rel_path(rel_path) + out_path = LIBRARY_DIR / rel_path + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_bytes(epub_bytes) + rel_filename = rel_path.as_posix() + + book_meta = { + "media_type": "epub", + "storage_type": "file", + "has_cover": cover_data is not None, + "series": series, + "series_index": series_index if series else 0, + "title": book_title, + "publication_status": book_info.get("publication_status", ""), + "author": author, + "publisher": book_info.get("publisher", ""), + "source_url": book_info.get("source_url", ""), + "description": book_info.get("description", ""), + "publish_date": final_updated_date, + "needs_review": True, + } + with get_db_conn() as conn: + with conn: + upsert_book(conn, rel_filename, book_meta, book_tags) + if cover_data: + try: + thumb = make_cover_thumb_webp(cover_data) + upsert_cover_cache(conn, rel_filename, "image/webp", thumb) + except Exception: + pass + + else: + # ── DB storage (default) ─────────────────────────────────────── + base_filename = make_rel_path( + media_type="db", + publisher=book_info.get("publisher", ""), + author=author, + title=book_title, + series=series, + series_index=series_index, + ).as_posix() + + book_meta = { + "media_type": "epub", + "storage_type": "db", + "has_cover": cover_data is not None, + "series": book_info.get("series", ""), + "series_index": series_index if book_info.get("series") else 0, + "title": book_title, + "publication_status": book_info.get("publication_status", ""), + "author": author, + "publisher": book_info.get("publisher", ""), + "source_url": book_info.get("source_url", ""), + "description": book_info.get("description", ""), + "publish_date": final_updated_date, + "needs_review": True, + } + info_body = build_book_info_body_html(book_title, author, book_info) + with get_db_conn() as conn: + with conn: + rel_filename = ensure_unique_db_filename(conn, base_filename) + upsert_book(conn, rel_filename, book_meta, book_tags) + upsert_chapter(conn, rel_filename, 0, "Book Info", info_body) + for idx, ch in enumerate(chapters): + upsert_chapter(conn, rel_filename, idx + 1, ch["title"], ch["content_html"]) + if cover_data: + try: + thumb = make_cover_thumb_webp(cover_data) + upsert_cover_cache(conn, rel_filename, "image/webp", thumb) + except Exception: + pass + + job["filename"] = rel_filename + send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters), "storage_type": storage_mode}) + job["done"] = True + finally: + await scraper.close() @router.post("/convert") @@ -612,6 +623,8 @@ async def convert(request: Request): job["updated_date_override"] = (body.get("updated_date") or "").strip() job["storage_mode"] = "epub" if body.get("storage_mode") == "epub" else "db" + while len(JOBS) >= 50: + JOBS.pop(next(iter(JOBS))) JOBS[job_id] = job asyncio.create_task(scrape_book(job_id, url, username, password)) return {"job_id": job_id, "using_credentials": bool(username)} diff --git a/containers/novela/routers/library.py b/containers/novela/routers/library.py index 3adcc67..b7559a6 100644 --- a/containers/novela/routers/library.py +++ b/containers/novela/routers/library.py @@ -80,12 +80,25 @@ async def api_library( if rescan: _sync_disk_to_db() - # ETag based on row count + latest updated_at — cheap query before full load. + # ETag based on row count + latest updated_at across library, reading_progress, and book_tags. with get_db_conn() as conn: with conn.cursor() as cur: - cur.execute("SELECT COUNT(*), MAX(updated_at) FROM library") - _count, _max_ts = cur.fetchone() - etag = f'"{_count}-{int(_max_ts.timestamp()) if _max_ts else 0}"' + cur.execute( + """ + SELECT + (SELECT COUNT(*) FROM library), + (SELECT MAX(updated_at) FROM library), + (SELECT MAX(updated_at) FROM reading_progress), + (SELECT MAX(id) FROM book_tags) + """ + ) + _count, _max_ts, _rp_ts, _bt_id = cur.fetchone() + etag = ( + f'"{_count}' + f'-{int(_max_ts.timestamp()) if _max_ts else 0}' + f'-{int(_rp_ts.timestamp()) if _rp_ts else 0}' + f'-{_bt_id or 0}"' + ) if request and request.headers.get("if-none-match") == etag: return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"}) @@ -216,6 +229,9 @@ async def library_bulk_delete(request: Request): for filename in filenames: if not isinstance(filename, str): continue + if is_db_filename(filename): + deleted.append(filename) + continue full = resolve_library_path(filename) if full is None: skipped.append(filename) @@ -403,8 +419,7 @@ async def library_add_cover(filename: str, request: Request): @router.post("/library/want-to-read/{filename:path}") async def library_want_to_read(filename: str): - full = resolve_library_path(filename) - if full is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return {"error": "Invalid filename"} with get_db_conn() as conn: with conn: @@ -423,8 +438,7 @@ async def library_want_to_read(filename: str): @router.post("/library/archive/{filename:path}") async def library_archive(filename: str): - full = resolve_library_path(filename) - if full is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return {"error": "Invalid filename"} with get_db_conn() as conn: with conn: @@ -474,8 +488,7 @@ async def library_mark_new_reviewed(request: Request): name = raw.strip() if not name or name in seen: continue - full = resolve_library_path(name) - if full is None: + if not is_db_filename(name) and resolve_library_path(name) is None: continue cleaned.append(name) seen.add(name) diff --git a/containers/novela/routers/reader.py b/containers/novela/routers/reader.py index 91d0a53..8390259 100644 --- a/containers/novela/routers/reader.py +++ b/containers/novela/routers/reader.py @@ -2,7 +2,6 @@ import html as _html import io -import posixpath import re import uuid import zipfile as zf @@ -17,10 +16,13 @@ from shared_templates import templates from cbr import cbr_get_page, cbr_page_count from db import get_db_conn from epub import make_chapter_xhtml, make_epub, read_epub_file, write_epub_file +from epub_utils import epub_spine, find_opf_path, rewrite_epub_entries from pdf import pdf_page_count, pdf_render_page from routers.common import ( IMAGES_DIR, LIBRARY_DIR, + clean_segment, + coerce_series_index, ensure_unique_db_filename, is_db_filename, make_cover_thumb_webp, @@ -35,159 +37,6 @@ from routers.common import ( router = APIRouter() -# --------------------------------------------------------------------------- -# EPUB helpers -# --------------------------------------------------------------------------- - -def _epub_spine(path: Path) -> list[dict]: - """Return ordered list of {index, title, href} for all spine items. - - Supports both EPUB2 (toc.ncx) and EPUB3 (nav.xhtml), and respects - the OPF location declared in META-INF/container.xml. - """ - def _norm(base_dir: str, rel: str) -> str: - rel = (rel or '').split('#', 1)[0].strip() - if not rel: - return '' - joined = posixpath.normpath(posixpath.join(base_dir, rel)) - return joined.lstrip('./') - - with zf.ZipFile(path, 'r') as z: - names = set(z.namelist()) - - opf_path = 'OEBPS/content.opf' - try: - container_xml = z.read('META-INF/container.xml').decode('utf-8', errors='replace') - m = re.search(r"full-path\\s*=\\s*['\"]([^'\"]+)['\"]", container_xml) - if m: - opf_path = m.group(1) - except Exception: - pass - if opf_path not in names: - # fallback for malformed books - candidates = [n for n in names if n.lower().endswith('.opf')] - if not candidates: - return [] - opf_path = sorted(candidates)[0] - - opf_xml = z.read(opf_path).decode('utf-8', errors='replace') - opf = BeautifulSoup(opf_xml, 'xml') - opf_dir = posixpath.dirname(opf_path) - - manifest: dict[str, str] = {} - for item in opf.find_all('item'): - iid = item.get('id') - href = item.get('href') - if iid and href: - manifest[iid] = _norm(opf_dir, href) - - spine_idrefs: list[str] = [] - spine_tag = opf.find('spine') - toc_id = spine_tag.get('toc') if spine_tag else None - if spine_tag: - for ir in spine_tag.find_all('itemref'): - rid = ir.get('idref') - if rid: - spine_idrefs.append(rid) - - hrefs = [manifest[rid] for rid in spine_idrefs if rid in manifest] - - href_to_title: dict[str, str] = {} - - # EPUB2: NCX titles - ncx_path = '' - if toc_id and toc_id in manifest: - ncx_path = manifest[toc_id] - elif 'toc.ncx' in names: - ncx_path = 'toc.ncx' - elif 'OEBPS/toc.ncx' in names: - ncx_path = 'OEBPS/toc.ncx' - - if ncx_path and ncx_path in names: - try: - ncx_xml = z.read(ncx_path).decode('utf-8', errors='replace') - ncx = BeautifulSoup(ncx_xml, 'xml') - ncx_dir = posixpath.dirname(ncx_path) - for np in ncx.find_all('navPoint'): - content = np.find('content') - label_tag = np.find('text') - src = content.get('src') if content else '' - label = label_tag.get_text(strip=True) if label_tag else '' - if src and label: - href_to_title[_norm(ncx_dir, src)] = _html.unescape(label) - except Exception: - pass - - # EPUB3: nav.xhtml titles (fallback) - if not href_to_title: - nav_item = None - for item in opf.find_all('item'): - props = (item.get('properties') or '').split() - if 'nav' in props: - nav_item = item - break - if nav_item and nav_item.get('href'): - nav_path = _norm(opf_dir, nav_item.get('href')) - if nav_path in names: - try: - nav_xml = z.read(nav_path).decode('utf-8', errors='replace') - nav = BeautifulSoup(nav_xml, 'lxml') - nav_dir = posixpath.dirname(nav_path) - for a in nav.select('nav a[href]'): - src = a.get('href', '') - label = a.get_text(' ', strip=True) - if src and label: - href_to_title[_norm(nav_dir, src)] = _html.unescape(label) - except Exception: - pass - - chapters = [] - for i, href in enumerate(hrefs): - base = posixpath.basename(href) - title = href_to_title.get(href, re.sub(r'\.(xhtml|html|htm)$', '', base, flags=re.I)) - chapters.append({'index': i, 'title': title or f'Chapter {i+1}', 'href': href}) - return chapters - - - -def _norm_href(base_dir: str, rel: str) -> str: - rel = (rel or '').split('#', 1)[0].strip() - if not rel: - return '' - return posixpath.normpath(posixpath.join(base_dir, rel)).lstrip('./') - - -def _find_opf_path(names: set[str], container_xml: str | None) -> str | None: - opf_path = 'OEBPS/content.opf' - if container_xml: - m = re.search(r'full-path\s*=\s*[\'"]([^\'"]+)[\'"]', container_xml) - if m: - opf_path = m.group(1) - if opf_path in names: - return opf_path - candidates = sorted(n for n in names if n.lower().endswith('.opf')) - return candidates[0] if candidates else None - - -def _make_new_chapter_xhtml(title: str) -> str: - safe_title = _html.escape((title or 'New chapter').strip() or 'New chapter') - return ( - '\n' - '\n' - '\n' - '\n' - ' \n' - f' {safe_title}\n' - ' \n' - '\n' - '\n' - f'

{safe_title}

\n' - '

\n' - '\n' - '\n' - ) - def _tag_local(name: str | None) -> str: if not name: @@ -200,7 +49,7 @@ def _write_epub_rating(epub_path: Path, rating: int) -> None: with zf.ZipFile(epub_path, "r") as z: names = set(z.namelist()) container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None - opf_path = _find_opf_path(names, container_xml) + opf_path = find_opf_path(names, container_xml) if not opf_path or opf_path not in names: return opf_xml = z.read(opf_path).decode("utf-8", errors="replace") @@ -220,7 +69,7 @@ def _write_epub_rating(epub_path: Path, rating: int) -> None: nt["content"] = str(rating) metadata.append(nt) - _rewrite_epub_entries(epub_path, {opf_path: str(opf).encode("utf-8")}) + rewrite_epub_entries(epub_path, {opf_path: str(opf).encode("utf-8")}) def _write_cbz_rating(cbz_path: Path, rating: int) -> None: @@ -287,7 +136,7 @@ def _sync_epub_metadata( with zf.ZipFile(epub_path, 'r') as z: names = set(z.namelist()) container_xml = z.read('META-INF/container.xml').decode('utf-8', errors='replace') if 'META-INF/container.xml' in names else None - opf_path = _find_opf_path(names, container_xml) + opf_path = find_opf_path(names, container_xml) if not opf_path or opf_path not in names: return opf_xml = z.read(opf_path).decode('utf-8', errors='replace') @@ -357,7 +206,7 @@ def _sync_epub_metadata( series_val = (series or '').strip() set_named_meta('calibre:series', series_val) if series_val: - set_named_meta('calibre:series_index', str(_coerce_series_index(series_index))) + set_named_meta('calibre:series_index', str(coerce_series_index(series_index))) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] set_named_meta('novela:series_suffix', sfx) set_named_meta('novela:series_volume', (series_volume or '').strip()[:20]) @@ -366,53 +215,7 @@ def _sync_epub_metadata( set_named_meta('novela:series_suffix', '') set_named_meta('novela:series_volume', '') - _rewrite_epub_entries(epub_path, {opf_path: str(opf).encode('utf-8')}) - - -def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_paths: set[str] | None = None) -> None: - remove_paths = remove_paths or set() - with open(epub_path, 'rb') as f: - original = f.read() - - out = io.BytesIO() - with zf.ZipFile(io.BytesIO(original), 'r') as zin, zf.ZipFile(out, 'w', zf.ZIP_DEFLATED) as zout: - existing = set() - for item in zin.infolist(): - name = item.filename - existing.add(name) - if name in remove_paths: - continue - data = updates.get(name) - if data is None: - data = zin.read(name) - ctype = zf.ZIP_STORED if name == 'mimetype' else zf.ZIP_DEFLATED - zout.writestr(name, data, compress_type=ctype) - - for name, data in updates.items(): - if name in existing or name in remove_paths: - continue - ctype = zf.ZIP_STORED if name == 'mimetype' else zf.ZIP_DEFLATED - zout.writestr(name, data, compress_type=ctype) - - with open(epub_path, 'wb') as f: - f.write(out.getvalue()) - - - -def _clean_segment(value: str, fallback: str, max_len: int = 100) -> str: - txt = re.sub(r"\s+", "_", (value or "").strip()) - txt = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", txt) - txt = re.sub(r"\.+$", "", txt).strip("_") - if not txt: - txt = fallback - return txt[:max_len] - - -def _coerce_series_index(value: int | str | None) -> int: - try: - return max(0, min(999, int(value or 0))) - except (TypeError, ValueError): - return 0 + rewrite_epub_entries(epub_path, {opf_path: str(opf).encode('utf-8')}) def _make_rel_path( @@ -425,27 +228,27 @@ def _make_rel_path( series_suffix: str = "", ext: str = ".epub", ) -> Path: - auth = _clean_segment(author, "Unknown Author", 80) - ttl = _clean_segment(title, "Untitled", 140) + auth = clean_segment(author, "Unknown Author", 80) + ttl = clean_segment(title, "Untitled", 140) if ext == ".epub": - pub = _clean_segment(publisher, "Unknown Publisher", 80) - series_name = _clean_segment(series, "", 120) + pub = clean_segment(publisher, "Unknown Publisher", 80) + series_name = clean_segment(series, "", 120) if series_name: - idx = _coerce_series_index(series_index) + idx = coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}.epub" return Path("epub") / pub / auth / "Stories" / f"{ttl}.epub" if ext == ".pdf": - pub = _clean_segment(publisher, "Unknown Publisher", 80) + pub = clean_segment(publisher, "Unknown Publisher", 80) return Path("pdf") / pub / auth / f"{ttl}.pdf" # .cbr / .cbz - pub = _clean_segment(publisher, "Unknown Publisher", 80) - series_name = _clean_segment(series, "", 80) + pub = clean_segment(publisher, "Unknown Publisher", 80) + series_name = clean_segment(series, "", 80) if series_name: - idx = _coerce_series_index(series_index) + idx = coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}{ext}" return Path("comics") / pub / auth / f"{ttl}{ext}" @@ -519,7 +322,7 @@ async def get_chapter_list(filename: str): return Response(status_code=404) if not path.exists(): return Response(status_code=404) - return _epub_spine(path) + return epub_spine(path) @router.get("/library/chapter/{index}/{filename:path}") @@ -543,8 +346,9 @@ async def get_chapter_html(filename: str, index: int): # Handles two layouts: # 1.

at the very start of content # 2.
\n

(pandoc-style wrapping) - content = re.sub(r'(?si)^(\s*]*)?>.*?)+\s*', '', content) - content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)]*>.*?\s*', r'\1', content, count=1) + if (title or "").strip() != "Book Info": + content = re.sub(r'(?si)^(\s*]*)?>.*?)+\s*', '', content) + content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)]*>.*?\s*', r'\1', content, count=1) return Response( f'

{safe_title}

\n{content}\n', media_type="text/html", @@ -555,7 +359,7 @@ async def get_chapter_html(filename: str, index: int): return Response(status_code=404) if not path.exists(): return Response(status_code=404) - spine = _epub_spine(path) + spine = epub_spine(path) if index < 0 or index >= len(spine): return Response(status_code=404) href = spine[index]["href"] @@ -612,7 +416,7 @@ async def get_chapter_image(path: str, filename: str): @router.get("/library/progress/{filename:path}") async def get_progress(filename: str): - if resolve_library_path(filename) is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return {"error": "Invalid filename"} with get_db_conn() as conn: with conn.cursor() as cur: @@ -630,7 +434,7 @@ async def clear_progress(filename: str): Reading sessions (mark-as-read history) are intentionally left intact. """ - if resolve_library_path(filename) is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return {"error": "Invalid filename"} with get_db_conn() as conn: with conn: @@ -641,7 +445,7 @@ async def clear_progress(filename: str): @router.post("/library/progress/{filename:path}") async def save_progress(filename: str, request: Request): - if resolve_library_path(filename) is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return {"error": "Invalid filename"} body = await request.json() cfi = body.get("cfi", "") @@ -652,9 +456,41 @@ async def save_progress(filename: str, request: Request): except Exception: page = None progress = max(0, min(100, int(body.get("progress", 0)))) + + def _parse_cfi(value): + # cfi format: "chapterIndex:scrollFrac" (scrollFrac optional) + if not value: + return (-1, 0.0) + parts = str(value).split(":", 1) + try: + idx = int(parts[0]) + except Exception: + return (-1, 0.0) + frac = 0.0 + if len(parts) > 1: + try: + frac = float(parts[1]) + except Exception: + frac = 0.0 + return (idx, frac) + + new_pos = _parse_cfi(cfi) with get_db_conn() as conn: with conn: with conn.cursor() as cur: + cur.execute( + "SELECT cfi, page, progress FROM reading_progress WHERE filename = %s", + (filename,), + ) + row = cur.fetchone() + if row is not None: + cur_pos = _parse_cfi(row[0]) + # Monotonic guard: only advance. cfi encodes (chapterIndex, + # scrollFrac) for all formats (epub/pdf/cbr), so a single + # tuple comparison is authoritative. Reset happens only via + # the explicit Read/Unread actions (which clear the row). + if new_pos <= cur_pos: + return {"ok": True, "skipped": True} cur.execute( """ INSERT INTO reading_progress (filename, cfi, page, progress, updated_at) @@ -1275,7 +1111,7 @@ async def convert_to_db(filename: str): # Extract chapters from EPUB try: - spine = _epub_spine(old_path) + spine = epub_spine(old_path) chapters = [] with zf.ZipFile(old_path, "r") as z: for entry in spine: @@ -1446,8 +1282,11 @@ async def export_epub(filename: str): for ch_idx, ch_title, ch_content in ch_rows: # Strip leading h-tags from stored content (same logic as chapter endpoint) # to prevent double titles when make_chapter_xhtml prepends its own heading. - ch_content = re.sub(r'(?si)^(\s*]*)?>.*?)+\s*', '', ch_content) - ch_content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)]*>.*?\s*', r'\1', ch_content, count=1) + # The "Book Info" chapter intentionally carries an

book title that + # must survive — so skip stripping for that one. + if (ch_title or "").strip() != "Book Info": + ch_content = re.sub(r'(?si)^(\s*]*)?>.*?)+\s*', '', ch_content) + ch_content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)]*>.*?\s*', r'\1', ch_content, count=1) modified_html, new_imgs = _rewrite_db_images_for_epub(ch_content, seen_images) chapter_xhtml = make_chapter_xhtml(ch_title or f"Chapter {ch_idx + 1}", modified_html, ch_idx + 1) chapters.append({"title": ch_title or f"Chapter {ch_idx + 1}", "xhtml": chapter_xhtml, "images": new_imgs}) @@ -1461,6 +1300,7 @@ async def export_epub(filename: str): epub_bytes = make_epub( title or "Untitled", author or "Unknown", chapters, cover_data, break_img_data, book_id, book_info, + include_intro=False, ) safe_title = re.sub(r'[^\w\-. ]', '', (title or "book")).strip() or "book" @@ -1569,7 +1409,7 @@ async def library_cbr_page(filename: str, page: int = 0): @router.get("/library/bookmarks/{filename:path}") async def get_bookmarks(filename: str): - if resolve_library_path(filename) is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return JSONResponse({"error": "Invalid filename"}, status_code=400) with get_db_conn() as conn: with conn.cursor() as cur: @@ -1597,7 +1437,7 @@ async def get_bookmarks(filename: str): @router.post("/library/bookmarks/{filename:path}") async def add_bookmark(filename: str, request: Request): - if resolve_library_path(filename) is None: + if not is_db_filename(filename) and resolve_library_path(filename) is None: return JSONResponse({"error": "Invalid filename"}, status_code=400) body = await request.json() chapter_index = int(body.get("chapter_index", 0)) diff --git a/containers/novela/scrapers/awesomedude.py b/containers/novela/scrapers/awesomedude.py index b190c5f..06f6054 100644 --- a/containers/novela/scrapers/awesomedude.py +++ b/containers/novela/scrapers/awesomedude.py @@ -4,7 +4,12 @@ from urllib.parse import urljoin, urlparse import httpx from bs4 import BeautifulSoup -from .base import BaseScraper +from .base import ( + BaseScraper, + flaresolverr_get, + flaresolverr_session_create, + flaresolverr_session_destroy, +) LAYOUT_RE = re.compile( r"nav|menu|sidebar|header|footer|breadcrumb|pagination|" @@ -16,6 +21,9 @@ GENERIC_PAGE_TITLES = {"awesomedude home"} class AwesomeDudeScraper(BaseScraper): + def __init__(self) -> None: + self._fs_session: str | None = None + @classmethod def matches(cls, url: str) -> bool: return "awesomedude.org" in url @@ -23,8 +31,19 @@ class AwesomeDudeScraper(BaseScraper): async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool: return True # no login required + async def _ensure_session(self) -> str: + if self._fs_session is None: + self._fs_session = await flaresolverr_session_create() + return self._fs_session + + async def close(self) -> None: + if self._fs_session: + await flaresolverr_session_destroy(self._fs_session) + self._fs_session = None + async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: - r = await client.get(url) + sid = await self._ensure_session() + r = await flaresolverr_get(url, session=sid) soup = BeautifulSoup(r.text, "html.parser") actual_url = str(r.url) @@ -147,7 +166,8 @@ class AwesomeDudeScraper(BaseScraper): } async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: - cr = await client.get(ch["url"]) + sid = await self._ensure_session() + cr = await flaresolverr_get(ch["url"], session=sid) csoup = BeautifulSoup(cr.text, "html.parser") title = ch["title"] book_title_lc = ch.get("book_title", "").lower() diff --git a/containers/novela/scrapers/base.py b/containers/novela/scrapers/base.py index 9570e86..00b80b5 100644 --- a/containers/novela/scrapers/base.py +++ b/containers/novela/scrapers/base.py @@ -1,4 +1,6 @@ +import os from abc import ABC, abstractmethod +from types import SimpleNamespace import httpx @@ -6,6 +8,56 @@ HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } +FLARESOLVERR_URL = os.environ.get("FLARESOLVERR_URL", "http://flaresolverr:8191/v1") +FLARESOLVERR_TIMEOUT_MS = int(os.environ.get("FLARESOLVERR_TIMEOUT_MS", "60000")) + + +async def _flaresolverr_call(payload: dict, http_timeout: float) -> dict: + async with httpx.AsyncClient(timeout=http_timeout) as fs: + r = await fs.post(FLARESOLVERR_URL, json=payload) + r.raise_for_status() + return r.json() + + +async def flaresolverr_get( + url: str, + timeout_ms: int | None = None, + session: str | None = None, +) -> SimpleNamespace: + """Fetch ``url`` through a FlareSolverr instance that solves Cloudflare challenges. + + If ``session`` is given, the request reuses that FlareSolverr session so the + browser + Cloudflare cookies stay warm across multiple calls (much faster for + per-chapter scraping). Returns an object with ``.text`` and ``.url``. + """ + max_timeout = timeout_ms if timeout_ms is not None else FLARESOLVERR_TIMEOUT_MS + payload = {"cmd": "request.get", "url": url, "maxTimeout": max_timeout} + if session: + payload["session"] = session + data = await _flaresolverr_call(payload, (max_timeout / 1000) + 10) + if data.get("status") != "ok": + raise RuntimeError( + f"FlareSolverr could not fetch {url}: {data.get('message') or data}" + ) + sol = data.get("solution") or {} + return SimpleNamespace(text=sol.get("response", ""), url=sol.get("url", url)) + + +async def flaresolverr_session_create() -> str: + """Create a FlareSolverr session and return its id.""" + data = await _flaresolverr_call({"cmd": "sessions.create"}, 30) + if data.get("status") != "ok": + raise RuntimeError(f"FlareSolverr session create failed: {data.get('message') or data}") + return data["session"] + + +async def flaresolverr_session_destroy(session: str) -> None: + """Destroy a FlareSolverr session. Errors are swallowed (best-effort cleanup).""" + try: + await _flaresolverr_call({"cmd": "sessions.destroy", "session": session}, 30) + except Exception: + pass + class BaseScraper(ABC): """Abstract base class for all site scrapers. @@ -44,6 +96,10 @@ class BaseScraper(ABC): Note: cover is not scraped. It is supplied by the user at convert time. """ + async def close(self) -> None: + """Release any scraper-scoped resources (e.g. FlareSolverr sessions). Default: no-op.""" + return None + @abstractmethod async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: """Fetch a chapter page and extract its content element. diff --git a/containers/novela/scrapers/codeysworld.py b/containers/novela/scrapers/codeysworld.py index 65d8f62..4c51f55 100644 --- a/containers/novela/scrapers/codeysworld.py +++ b/containers/novela/scrapers/codeysworld.py @@ -26,7 +26,9 @@ class CodeysWorldScraper(BaseScraper): async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: r = await client.get(url) - soup = BeautifulSoup(r.text, "html.parser") + # Codey's World is a Windows-hosted legacy site; cp1252 correctly maps + # the 0x80–0x9F range (…, ', ", —, etc.) that iso-8859-1 leaves undefined. + soup = BeautifulSoup(r.content.decode("cp1252", errors="replace"), "html.parser") actual_url = str(r.url) # Title:

@@ -116,7 +118,7 @@ class CodeysWorldScraper(BaseScraper): async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: cr = await client.get(ch["url"]) - csoup = BeautifulSoup(cr.text, "html.parser") + csoup = BeautifulSoup(cr.content.decode("cp1252", errors="replace"), "html.parser") title = ch["title"] book_title_lc = ch.get("book_title", "").lower() author_lc = ch.get("author", "").lower() diff --git a/containers/novela/security.py b/containers/novela/security.py index d5dc3f7..5f9ce4e 100644 --- a/containers/novela/security.py +++ b/containers/novela/security.py @@ -1,4 +1,5 @@ import base64 +import functools import hashlib import os @@ -8,13 +9,16 @@ _PREFIX = "enc$" def _master_secret() -> str: - return ( - os.environ.get("NOVELA_MASTER_KEY") - or os.environ.get("POSTGRES_PASSWORD") - or "novela-default-key-change-me" - ) + secret = os.environ.get("NOVELA_MASTER_KEY") or os.environ.get("POSTGRES_PASSWORD") + if not secret: + raise RuntimeError( + "No encryption key configured. Set NOVELA_MASTER_KEY " + "(or POSTGRES_PASSWORD as fallback)." + ) + return secret +@functools.lru_cache(maxsize=1) def _fernet() -> Fernet: digest = hashlib.sha256(_master_secret().encode("utf-8")).digest() key = base64.urlsafe_b64encode(digest) diff --git a/containers/novela/static/editor.css b/containers/novela/static/editor.css index e61dd1c..4325122 100644 --- a/containers/novela/static/editor.css +++ b/containers/novela/static/editor.css @@ -77,7 +77,8 @@ html, body { height: 100%; background: var(--bg); color: var(--text); font-famil .btn-break:disabled { opacity: 0.3; cursor: not-allowed; } .btn-break:not(:disabled):hover { color: var(--text); border-color: var(--text-faint); } -.btn-replace { +.btn-replace, +.btn-info-page { display: flex; align-items: center; gap: 0.35rem; padding: 0.3rem 0.7rem; background: none; border: 1px solid var(--border); border-radius: var(--radius); @@ -85,7 +86,48 @@ html, body { height: 100%; background: var(--bg); color: var(--text); font-famil color: var(--text-dim); cursor: pointer; transition: color 0.12s, border-color 0.12s; } -.btn-replace:hover { color: var(--text); border-color: var(--text-faint); } +.btn-replace:hover, +.btn-info-page:hover { color: var(--text); border-color: var(--text-faint); } + +.btn-subheading { + padding: 0.3rem 0.6rem; + background: none; border: 1px solid rgba(224,62,45,0.4); border-radius: var(--radius); + font-family: var(--mono); font-size: 0.68rem; font-weight: bold; + color: rgb(224,62,45); cursor: pointer; + transition: background 0.12s, border-color 0.12s; +} +.btn-subheading:disabled { opacity: 0.3; cursor: not-allowed; } +.btn-subheading:not(:disabled):hover { background: rgba(224,62,45,0.1); border-color: rgb(224,62,45); } + +.btn-chat { + padding: 0.3rem 0.6rem; + background: none; border: 1px solid rgba(230,126,35,0.4); border-radius: var(--radius); + font-family: var(--mono); font-size: 0.68rem; + color: rgb(230,126,35); cursor: pointer; + transition: background 0.12s, border-color 0.12s; +} +.btn-chat:disabled { opacity: 0.3; cursor: not-allowed; } +.btn-chat:not(:disabled):hover { background: rgba(230,126,35,0.1); border-color: rgb(230,126,35); } + +.btn-indent { + padding: 0.3rem 0.6rem; + background: none; border: 1px solid var(--border); border-radius: var(--radius); + font-family: var(--mono); font-size: 0.68rem; + color: var(--text-dim); cursor: pointer; + transition: color 0.12s, border-color 0.12s; +} +.btn-indent:disabled { opacity: 0.3; cursor: not-allowed; } +.btn-indent:not(:disabled):hover { color: var(--text); border-color: var(--text-faint); } + +.btn-comment { + padding: 0.3rem 0.6rem; + background: none; border: 1px solid rgba(107,157,214,0.4); border-radius: var(--radius); + font-family: var(--mono); font-size: 0.68rem; + color: #6b9dd6; cursor: pointer; + transition: background 0.12s, border-color 0.12s; +} +.btn-comment:disabled { opacity: 0.3; cursor: not-allowed; } +.btn-comment:not(:disabled):hover { background: rgba(107,157,214,0.1); border-color: #6b9dd6; } .btn-add-page, diff --git a/containers/novela/static/editor.js b/containers/novela/static/editor.js index 78c9626..ef4a058 100644 --- a/containers/novela/static/editor.js +++ b/containers/novela/static/editor.js @@ -2,22 +2,27 @@ require.config({ paths: { vs: 'https://cdn.jsdelivr.net/npm/monaco-editor@0.45.0 const { filename, is_db } = EDITOR; -let editor = null; -let chapters = []; -let currentIndex = -1; -let dirty = new Set(); // indices with unsaved changes -let pendingContent = new Map(); // index -> modified content not yet saved -let pendingTitles = new Map(); // index -> modified title not yet saved (DB only) -let loadingChapter = false; // suppress dirty events during setValue -let saving = false; +// ── State ───────────────────────────────────────────────────────────────────── + +let editor = null; +let chapters = []; // [{title, href, _id, _new, _serverIndex}, ...] +let nextLocalId = 0; +let pendingDeletes = []; // [{_serverIndex, title}, ...] to be deleted on save +let currentIndex = -1; // index into chapters[] +let dirty = new Set(); // chapter _ids with unsaved content/title changes +let pendingContent = new Map(); // _id -> content string +let pendingTitles = new Map(); // _id -> title string (DB only) +let structureDirty = false; // pending adds or deletes not yet on server +let loadingChapter = false; +let saving = false; + +function currentCh() { return currentIndex >= 0 ? chapters[currentIndex] : null; } // ── Init Monaco ─────────────────────────────────────────────────────────────── require(['vs/editor/editor.main'], function () { - if (is_db) { - document.getElementById('header-chapter').style.display = 'none'; - document.getElementById('chapter-title-input').style.display = ''; - } + document.getElementById('header-chapter').style.display = 'none'; + document.getElementById('chapter-title-input').style.display = ''; editor = monaco.editor.create(document.getElementById('editor-pane'), { language: is_db ? 'html' : 'xml', @@ -33,8 +38,9 @@ require(['vs/editor/editor.main'], function () { editor.onDidChangeModelContent(() => { if (loadingChapter) return; - if (currentIndex >= 0) { - dirty.add(currentIndex); + const ch = currentCh(); + if (ch) { + dirty.add(ch._id); renderChapterList(); setStatus('dirty', 'Unsaved changes'); document.getElementById('btn-save').disabled = false; @@ -45,18 +51,17 @@ require(['vs/editor/editor.main'], function () { // Ctrl+S / Cmd+S editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, saveChapter); - if (is_db) { - document.getElementById('chapter-title-input').addEventListener('input', () => { - if (currentIndex >= 0) { - pendingTitles.set(currentIndex, document.getElementById('chapter-title-input').value); - dirty.add(currentIndex); - renderChapterList(); - setStatus('dirty', 'Unsaved changes'); - document.getElementById('btn-save').disabled = false; - updateSaveAll(); - } - }); - } + document.getElementById('chapter-title-input').addEventListener('input', () => { + const ch = currentCh(); + if (ch) { + pendingTitles.set(ch._id, document.getElementById('chapter-title-input').value); + dirty.add(ch._id); + renderChapterList(); + setStatus('dirty', 'Unsaved changes'); + document.getElementById('btn-save').disabled = false; + updateSaveAll(); + } + }); loadChapterList(); }); @@ -69,18 +74,26 @@ async function loadChapterList(targetIndex = 0) { setStatus('error', 'Failed to load chapters'); return; } - chapters = await resp.json(); - if (!Array.isArray(chapters)) chapters = []; + const raw = await resp.json(); + chapters = Array.isArray(raw) + ? raw.map((ch, i) => ({ ...ch, _id: nextLocalId++, _new: false, _serverIndex: i })) + : []; + pendingDeletes = []; + dirty.clear(); + pendingContent.clear(); + pendingTitles.clear(); + structureDirty = false; if (chapters.length === 0) { currentIndex = -1; - dirty.clear(); - pendingContent.clear(); - pendingTitles.clear(); renderChapterList(); document.getElementById('header-chapter').textContent = 'No chapters'; document.getElementById('btn-save').disabled = true; document.getElementById('btn-break').disabled = true; + document.getElementById('btn-subheading').disabled = true; + document.getElementById('btn-chat').disabled = true; + document.getElementById('btn-indent').disabled = true; + document.getElementById('btn-comment').disabled = true; document.getElementById('btn-del-page').disabled = true; if (editor) { loadingChapter = true; editor.setValue(''); loadingChapter = false; } updateSaveAll(); @@ -98,9 +111,8 @@ function renderChapterList() { chapters.forEach((ch, i) => { const item = document.createElement('div'); item.className = 'chapter-item' + (i === currentIndex ? ' active' : ''); - item.innerHTML = - (dirty.has(i) ? '' : '') + - `${esc(ch.title)}`; + const dot = dirty.has(ch._id) ? '' : ''; + item.innerHTML = dot + `${esc(ch.title)}`; item.onclick = () => switchChapter(i); el.appendChild(item); }); @@ -110,36 +122,43 @@ function renderChapterList() { async function switchChapter(index) { if (index === currentIndex) return; - // Preserve current editor content in pending cache before switching (never lose changes) - if (dirty.has(currentIndex) && editor) { - pendingContent.set(currentIndex, editor.getValue()); - } - // Preserve title input for DB books - if (is_db && currentIndex >= 0) { + // Flush current content/title to pending cache before switching + const ch = currentCh(); + if (ch) { + if (dirty.has(ch._id) && editor) pendingContent.set(ch._id, editor.getValue()); const inp = document.getElementById('chapter-title-input'); - if (inp) pendingTitles.set(currentIndex, inp.value); + if (inp) pendingTitles.set(ch._id, inp.value); } - loadChapter(index); + await loadChapter(index); } async function loadChapter(index) { setStatus('', ''); document.getElementById('btn-save').disabled = true; document.getElementById('btn-break').disabled = true; + document.getElementById('btn-subheading').disabled = true; + document.getElementById('btn-chat').disabled = true; document.getElementById('btn-del-page').disabled = true; if (!is_db) document.getElementById('header-chapter').textContent = 'Loading…'; + const ch = chapters[index]; + if (!ch) return; + let content, title; - if (pendingContent.has(index)) { - content = pendingContent.get(index); - title = pendingTitles.has(index) ? pendingTitles.get(index) : (chapters[index]?.title ?? ''); + if (pendingContent.has(ch._id)) { + content = pendingContent.get(ch._id); + title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : ch.title; + } else if (ch._new) { + // New chapter not yet on server — starts empty + content = ''; + title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : ch.title; } else { - const resp = await fetch(`/api/edit/chapter/${index}/${encodeURIComponent(filename)}`); + const resp = await fetch(`/api/edit/chapter/${ch._serverIndex}/${encodeURIComponent(filename)}`); if (!resp.ok) { setStatus('error', 'Load failed'); return; } const data = await resp.json(); content = data.content; - title = pendingTitles.has(index) ? pendingTitles.get(index) : data.title; + title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : data.title; } currentIndex = index; @@ -150,22 +169,17 @@ async function loadChapter(index) { loadingChapter = false; editor.focus(); - // Restore dirty state based on whether we loaded from pending cache - if (dirty.has(index)) { - document.getElementById('btn-save').disabled = false; - setStatus('dirty', 'Unsaved changes'); - } else { - document.getElementById('btn-save').disabled = true; - setStatus('', ''); - } + const hasChanges = dirty.has(ch._id) || structureDirty; + document.getElementById('btn-save').disabled = !hasChanges; + if (hasChanges) setStatus('dirty', 'Unsaved changes'); renderChapterList(); - if (is_db) { - document.getElementById('chapter-title-input').value = title; - } else { - document.getElementById('header-chapter').textContent = title; - } + document.getElementById('chapter-title-input').value = title; document.getElementById('btn-break').disabled = false; + document.getElementById('btn-subheading').disabled = false; + document.getElementById('btn-chat').disabled = false; + document.getElementById('btn-indent').disabled = false; + document.getElementById('btn-comment').disabled = false; document.getElementById('btn-del-page').disabled = chapters.length <= 1; updateSaveAll(); } @@ -173,34 +187,49 @@ async function loadChapter(index) { // ── Save (current chapter) ──────────────────────────────────────────────────── async function saveChapter() { - if (currentIndex < 0 || saving) return; + if (saving) return; saving = true; document.getElementById('btn-save').disabled = true; - setStatus('saving', 'Saving…'); + // Apply structural changes (add/delete) before saving content + if (structureDirty) { + setStatus('saving', 'Applying changes…'); + try { + await applyStructuralChanges(); + } catch (e) { + setStatus('error', e.message || 'Failed to apply changes'); + document.getElementById('btn-save').disabled = false; + saving = false; + return; + } + renderChapterList(); + } + + const ch = currentCh(); + if (!ch || !dirty.has(ch._id)) { + // Structural changes saved, no content changes for this chapter + setStatus('saved', 'Saved'); + setTimeout(() => setStatus('', ''), 2000); + saving = false; + updateSaveAll(); + return; + } + + setStatus('saving', 'Saving…'); try { const saveBody = { content: editor.getValue() }; - if (is_db) { - const inp = document.getElementById('chapter-title-input'); - saveBody.title = inp ? inp.value.trim() : (pendingTitles.get(currentIndex) || ''); - } + const inp = document.getElementById('chapter-title-input'); + saveBody.title = inp ? inp.value.trim() : (pendingTitles.get(ch._id) || ''); const resp = await fetch( - `/api/edit/chapter/${currentIndex}/${encodeURIComponent(filename)}`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(saveBody), - } + `/api/edit/chapter/${ch._serverIndex}/${encodeURIComponent(filename)}`, + { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(saveBody) } ); const data = await resp.json(); if (data.ok) { - dirty.delete(currentIndex); - pendingContent.delete(currentIndex); - if (is_db && chapters[currentIndex]) { - const saved = pendingTitles.get(currentIndex) || chapters[currentIndex].title; - chapters[currentIndex].title = saved || chapters[currentIndex].title; - pendingTitles.delete(currentIndex); - } + dirty.delete(ch._id); + pendingContent.delete(ch._id); + ch.title = pendingTitles.get(ch._id) || ch.title; + pendingTitles.delete(ch._id); renderChapterList(); setStatus('saved', 'Saved'); setTimeout(() => setStatus('', ''), 2000); @@ -227,58 +256,64 @@ async function saveAllChapters() { setStatus('saving', 'Saving all…'); // Flush current editor content and title into pending caches first - if (currentIndex >= 0 && dirty.has(currentIndex)) { - pendingContent.set(currentIndex, editor.getValue()); - if (is_db) { - const inp = document.getElementById('chapter-title-input'); - if (inp) pendingTitles.set(currentIndex, inp.value); - } + const ch = currentCh(); + if (ch && dirty.has(ch._id)) { + pendingContent.set(ch._id, editor.getValue()); + const inp = document.getElementById('chapter-title-input'); + if (inp) pendingTitles.set(ch._id, inp.value); } - const indices = [...dirty]; - for (const i of indices) { - const content = pendingContent.has(i) - ? pendingContent.get(i) - : (i === currentIndex ? editor.getValue() : null); - // For DB books, a title-only change has no pendingContent — still need to save - const hasTitleChange = is_db && pendingTitles.has(i); + // Apply structural changes first + if (structureDirty) { + setStatus('saving', 'Applying changes…'); + try { + await applyStructuralChanges(); + } catch (e) { + setStatus('error', e.message || 'Failed to apply changes'); + saving = false; + updateSaveAll(); + return; + } + renderChapterList(); + } + + const ids = [...dirty]; + for (const id of ids) { + const chapter = chapters.find(c => c._id === id); + if (!chapter) { dirty.delete(id); continue; } + + const content = pendingContent.has(id) + ? pendingContent.get(id) + : (chapter._id === currentCh()?._id ? editor.getValue() : null); + const hasTitleChange = pendingTitles.has(id); if (!content && !hasTitleChange) continue; try { const saveBody = { content: content || '' }; - if (is_db) saveBody.title = pendingTitles.has(i) ? pendingTitles.get(i) : (chapters[i]?.title || ''); + saveBody.title = pendingTitles.has(id) ? pendingTitles.get(id) : (chapter.title || ''); const resp = await fetch( - `/api/edit/chapter/${i}/${encodeURIComponent(filename)}`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(saveBody), - } + `/api/edit/chapter/${chapter._serverIndex}/${encodeURIComponent(filename)}`, + { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(saveBody) } ); const data = await resp.json(); if (data.ok) { - dirty.delete(i); - pendingContent.delete(i); - if (is_db && chapters[i]) { - chapters[i].title = pendingTitles.get(i) || chapters[i].title; - pendingTitles.delete(i); - } + dirty.delete(id); + pendingContent.delete(id); + chapter.title = pendingTitles.get(id) || chapter.title; + pendingTitles.delete(id); } } catch { - setStatus('error', `Save failed on chapter ${i + 1}`); + setStatus('error', 'Save failed'); saving = false; updateSaveAll(); return; } } - // Reload current chapter display to reflect saved state - if (currentIndex >= 0) { - loadingChapter = true; - editor.setValue(editor.getValue()); // no-op, just clears dirty for display - loadingChapter = false; - document.getElementById('btn-save').disabled = true; - } + loadingChapter = true; + editor.setValue(editor.getValue()); // refresh display + loadingChapter = false; + document.getElementById('btn-save').disabled = true; renderChapterList(); setStatus('saved', 'All saved'); @@ -294,11 +329,65 @@ function updateSaveAll() { if (count > 1) { btn.style.display = 'flex'; btn.textContent = `Save all (${count})`; + btn.disabled = false; } else { btn.style.display = 'none'; } } +// ── Apply structural changes (adds/deletes) ─────────────────────────────────── + +async function applyStructuralChanges() { + // Step 1: apply deletes in descending server-index order so earlier indices stay valid + const sorted = [...pendingDeletes].sort((a, b) => b._serverIndex - a._serverIndex); + for (const del of sorted) { + const resp = await fetch( + `/api/edit/chapter/${del._serverIndex}/${encodeURIComponent(filename)}`, + { method: 'DELETE' } + ); + if (!resp.ok) { + const data = await resp.json().catch(() => ({})); + throw new Error(data.error || 'Delete failed'); + } + // Shift server indices for remaining chapters + chapters.forEach(c => { + if (c._serverIndex !== null && c._serverIndex > del._serverIndex) c._serverIndex--; + }); + } + pendingDeletes = []; + + // Step 2: apply adds in order of appearance in chapters[] + for (const ch of chapters.filter(c => c._new)) { + const localIdx = chapters.indexOf(ch); + // Find nearest preceding chapter that already has a server index + let afterServerIndex = -1; + for (let j = localIdx - 1; j >= 0; j--) { + if (chapters[j]._serverIndex !== null) { + afterServerIndex = chapters[j]._serverIndex; + break; + } + } + const title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : ch.title; + const resp = await fetch( + `/api/edit/chapter/add/${encodeURIComponent(filename)}`, + { method: 'POST', headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ title, after_index: afterServerIndex }) } + ); + const data = await resp.json(); + if (!resp.ok || !data.ok) throw new Error(data.error || 'Add chapter failed'); + + const addedIdx = data.index; + // Shift server indices for chapters inserted after this position + chapters.forEach(c => { + if (c._id !== ch._id && c._serverIndex !== null && c._serverIndex >= addedIdx) c._serverIndex++; + }); + ch._serverIndex = addedIdx; + ch._new = false; + } + + structureDirty = false; +} + // ── Insert break ────────────────────────────────────────────────────────────── function insertBreak() { @@ -313,66 +402,174 @@ function insertBreak() { editor.focus(); } +// ── Wrap selection in tag ───────────────────────────────────────────────────── +function wrapTag(tag, attrs) { + if (!editor || currentIndex < 0) return; + const sel = editor.getSelection(); + const open = attrs ? `<${tag} ${attrs}>` : `<${tag}>`; + const close = ``; + const selectedText = editor.getModel().getValueInRange(sel); + const isEmpty = sel.startLineNumber === sel.endLineNumber && sel.startColumn === sel.endColumn; + editor.executeEdits('wrap-tag', [{ + range: sel, + text: open + selectedText + close, + forceMoveMarkers: true, + }]); + if (isEmpty) { + const pos = editor.getPosition(); + editor.setPosition({ lineNumber: pos.lineNumber, column: pos.column - close.length }); + } + editor.focus(); +} -// ── Add / delete chapter ───────────────────────────────────────────────────── +// Wraps selected text in a span (inline). If the selection contains block +// elements (

,

, ) the span is replaced by a
so the result +// stays valid HTML. +function wrapSpan(cls) { + if (!editor || currentIndex < 0) return; + const sel = editor.getSelection(); + const selectedText = editor.getModel().getValueInRange(sel); + const hasBlock = /<(p|div|h[1-6]|blockquote|ul|ol|li)[\s>]/i.test(selectedText); + const tag = hasBlock ? 'div' : 'span'; + wrapTag(tag, `class="${cls}"`); +} + +function insertIndent() { + if (!editor || currentIndex < 0) return; + const sel = editor.getSelection(); + const selectedText = editor.getModel().getValueInRange(sel); + const hasBlock = /<(p|div|h[1-6]|blockquote|ul|ol|li)[\s>]/i.test(selectedText); + // If selection contains block elements wrap in a div, else in a p + const tag = hasBlock ? 'div' : 'p'; + wrapTag(tag, 'style="padding-left: 40px;"'); +} + +function insertComment() { wrapTag('div', 'class="novela-comment"'); } + +// ── Add / delete chapter ────────────────────────────────────────────────────── async function addChapter() { if (saving) return; - if (dirty.size > 0) { - alert('Save pending changes before adding a page.'); - return; - } const title = prompt('Title for new page:', `New chapter ${Math.max(chapters.length + 1, 1)}`); if (title === null) return; - const resp = await fetch(`/api/edit/chapter/add/${encodeURIComponent(filename)}`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ title, after_index: currentIndex }), - }); - const data = await resp.json(); - if (!resp.ok || !data.ok) { - setStatus('error', data.error || 'Add page failed'); - return; - } + const insertIdx = currentIndex < 0 ? chapters.length : currentIndex + 1; + const newCh = { + title: title.trim() || 'New chapter', + href: null, + _id: nextLocalId++, + _new: true, + _serverIndex: null, + }; + chapters.splice(insertIdx, 0, newCh); + structureDirty = true; + currentIndex = insertIdx; - dirty.clear(); - pendingContent.clear(); - pendingTitles.clear(); - await loadChapterList(data.index ?? Math.max(currentIndex + 1, 0)); - setStatus('saved', 'Page added'); - setTimeout(() => setStatus('', ''), 1500); + renderChapterList(); + + loadingChapter = true; + editor.setValue(''); + editor.setScrollTop(0); + loadingChapter = false; + editor.focus(); + + document.getElementById('btn-save').disabled = false; + document.getElementById('btn-break').disabled = false; + document.getElementById('btn-subheading').disabled = false; + document.getElementById('btn-chat').disabled = false; + document.getElementById('btn-indent').disabled = false; + document.getElementById('btn-comment').disabled = false; + document.getElementById('btn-del-page').disabled = chapters.length <= 1; + setStatus('dirty', 'Unsaved changes'); + document.getElementById('chapter-title-input').value = newCh.title; + updateSaveAll(); } async function deleteChapter() { if (saving || currentIndex < 0) return; + saving = true; if (chapters.length <= 1) { alert('Cannot delete the last page.'); + saving = false; return; } - if (dirty.size > 0) { - alert('Save pending changes before deleting a page.'); - return; - } - const chTitle = chapters[currentIndex]?.title || `chapter ${currentIndex + 1}`; - if (!confirm(`Delete page "${chTitle}"?`)) return; + const ch = chapters[currentIndex]; + const chTitle = ch.title || `chapter ${currentIndex + 1}`; + if (!confirm(`Delete page "${chTitle}"?`)) { saving = false; return; } - const resp = await fetch(`/api/edit/chapter/${currentIndex}/${encodeURIComponent(filename)}`, { - method: 'DELETE', - }); - const data = await resp.json(); - if (!resp.ok || !data.ok) { - setStatus('error', data.error || 'Delete page failed'); + // Clean up pending state for this chapter + dirty.delete(ch._id); + pendingContent.delete(ch._id); + pendingTitles.delete(ch._id); + + const removedIndex = currentIndex; + + if (ch._new) { + // Never reached the server — just remove locally + chapters.splice(removedIndex, 1); + structureDirty = pendingDeletes.length > 0 || chapters.some(c => c._new); + } else { + pendingDeletes.push({ _serverIndex: ch._serverIndex, title: chTitle }); + chapters.splice(removedIndex, 1); + structureDirty = true; + } + + const newCount = chapters.length; + if (newCount === 0) { + currentIndex = -1; + renderChapterList(); + if (!is_db) document.getElementById('header-chapter').textContent = 'No chapters'; + document.getElementById('btn-save').disabled = !structureDirty; + if (structureDirty) setStatus('dirty', 'Unsaved changes'); + document.getElementById('btn-break').disabled = true; + document.getElementById('btn-subheading').disabled = true; + document.getElementById('btn-chat').disabled = true; + document.getElementById('btn-indent').disabled = true; + document.getElementById('btn-comment').disabled = true; + document.getElementById('btn-del-page').disabled = true; + if (editor) { loadingChapter = true; editor.setValue(''); loadingChapter = false; } + saving = false; + updateSaveAll(); return; } - dirty.clear(); - pendingContent.clear(); - pendingTitles.clear(); - await loadChapterList(data.index ?? Math.max(currentIndex - 1, 0)); - setStatus('saved', 'Page deleted'); - setTimeout(() => setStatus('', ''), 1500); + const newIdx = Math.min(removedIndex, newCount - 1); + renderChapterList(); + await loadChapter(newIdx); + setStatus('dirty', 'Unsaved changes'); + saving = false; +} + +// ── Generate Book Info page ─────────────────────────────────────────────────── + +async function generateIntroPage() { + if (saving) return; + if (structureDirty || dirty.size > 0) { + alert('Please save pending changes before generating the info page.'); + return; + } + saving = true; + setStatus('saving', 'Generating info page…'); + try { + const resp = await fetch( + `/api/edit/intro/${encodeURIComponent(filename)}`, + { method: 'POST' } + ); + const data = await resp.json().catch(() => ({})); + if (!resp.ok || !data.ok) { + setStatus('error', data.error || 'Failed to generate info page'); + saving = false; + return; + } + setStatus('saved', 'Info page added'); + setTimeout(() => setStatus('', ''), 2000); + saving = false; + await loadChapterList(0); + } catch { + setStatus('error', 'Failed to generate info page'); + saving = false; + } } // ── Find & Replace all chapters ─────────────────────────────────────────────── @@ -410,7 +607,7 @@ async function replaceInAllChapters() { ? new RegExp(searchVal, caseSens ? 'g' : 'gi') : new RegExp(searchVal.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), caseSens ? 'g' : 'gi'); } catch (e) { - prog.className = 'modal-progress error'; + prog.className = 'modal-progress error'; prog.textContent = 'Invalid regex: ' + e.message; runBtn.disabled = false; return; @@ -420,20 +617,22 @@ async function replaceInAllChapters() { let chaptersChanged = 0; // Flush current editor content into pending before we start - if (currentIndex >= 0) { - pendingContent.set(currentIndex, editor.getValue()); - } + const curCh = currentCh(); + if (curCh) pendingContent.set(curCh._id, editor.getValue()); for (let i = 0; i < chapters.length; i++) { - prog.className = 'modal-progress'; + const ch = chapters[i]; + prog.className = 'modal-progress'; prog.textContent = `Checking chapter ${i + 1} / ${chapters.length}…`; let original; - if (pendingContent.has(i)) { - original = pendingContent.get(i); + if (pendingContent.has(ch._id)) { + original = pendingContent.get(ch._id); + } else if (ch._new) { + original = ''; } else { try { - const resp = await fetch(`/api/edit/chapter/${i}/${encodeURIComponent(filename)}`); + const resp = await fetch(`/api/edit/chapter/${ch._serverIndex}/${encodeURIComponent(filename)}`); if (!resp.ok) continue; const data = await resp.json(); original = data.content; @@ -442,21 +641,21 @@ async function replaceInAllChapters() { } } - // Count occurrences let count = 0; const updated = original.replace(pattern, m => { count++; return replaceVal; }); if (count === 0) continue; totalOccurrences += count; chaptersChanged++; - pendingContent.set(i, updated); - dirty.add(i); + pendingContent.set(ch._id, updated); + dirty.add(ch._id); } // Reload current chapter from pending cache if it was changed - if (dirty.has(currentIndex) && pendingContent.has(currentIndex)) { + const cur = currentCh(); + if (cur && dirty.has(cur._id) && pendingContent.has(cur._id)) { loadingChapter = true; - editor.setValue(pendingContent.get(currentIndex)); + editor.setValue(pendingContent.get(cur._id)); loadingChapter = false; document.getElementById('btn-save').disabled = false; setStatus('dirty', 'Unsaved changes'); @@ -465,7 +664,7 @@ async function replaceInAllChapters() { renderChapterList(); updateSaveAll(); - prog.className = totalOccurrences > 0 ? 'modal-progress ok' : 'modal-progress'; + prog.className = totalOccurrences > 0 ? 'modal-progress ok' : 'modal-progress'; prog.textContent = totalOccurrences > 0 ? `${totalOccurrences} replacement${totalOccurrences !== 1 ? 's' : ''} in ${chaptersChanged} chapter${chaptersChanged !== 1 ? 's' : ''} — not saved yet.` : 'No matches found.'; @@ -476,6 +675,6 @@ async function replaceInAllChapters() { function setStatus(cls, text) { const el = document.getElementById('save-status'); - el.className = 'save-status' + (cls ? ' ' + cls : ''); + el.className = 'save-status' + (cls ? ' ' + cls : ''); el.textContent = text; } diff --git a/containers/novela/templates/backup.html b/containers/novela/templates/backup.html index fb7a42e..f18eea1 100644 --- a/containers/novela/templates/backup.html +++ b/containers/novela/templates/backup.html @@ -205,6 +205,7 @@
+

@@ -225,7 +226,8 @@ ID Status - Files + Scanned + Uploaded Bytes Started Finished @@ -317,7 +319,8 @@ el.innerHTML = [ rowHtml('ID', d.id), rowHtml('Status', d.status), - rowHtml('Files', d.files_count ?? '-'), + rowHtml('Scanned', d.scanned_files ?? '-'), + rowHtml('Uploaded', d.uploaded_files ?? '-'), rowHtml('Bytes', d.size_bytes ?? '-'), rowHtml('Started', d.started_at ?? '-'), rowHtml('Finished', d.finished_at ?? '-'), @@ -327,18 +330,19 @@ async function loadHistory() { const body = document.getElementById('history-body'); - body.innerHTML = 'Loading...'; + body.innerHTML = 'Loading...'; const r = await fetch('/api/backup/history'); const rows = await r.json(); if (!rows.length) { - body.innerHTML = 'No backup history yet.'; + body.innerHTML = 'No backup history yet.'; return; } body.innerHTML = rows.map((x) => ` ${esc(x.id)} ${esc(x.status)} - ${esc(x.files_count ?? '-')} + ${esc(x.scanned_files ?? '-')} + ${esc(x.uploaded_files ?? '-')} ${esc(x.size_bytes ?? '-')} ${esc(x.started_at ?? '-')} ${esc(x.finished_at ?? '-')} @@ -520,8 +524,9 @@ out.className = 'status-line ok'; if (d.status === 'running') { out.textContent = `Backup started in background. id=${d.backup_id}, dry_run=${d.dry_run}`; - // Immediately kick off sidebar progress polling + // Immediately kick off sidebar progress polling and page progress polling if (typeof loadBackupProgress === 'function') loadBackupProgress(); + pollRunProgress(); } else { out.textContent = `Backup ${d.status}. id=${d.backup_id}, files=${d.files_count}, bytes=${d.size_bytes}, dry_run=${d.dry_run}`; } @@ -541,6 +546,34 @@ async function refreshAll() { await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory(), loadSnapshots()]); + pollRunProgress(); + } + + let _runProgressTimer = null; + async function pollRunProgress() { + if (_runProgressTimer) { clearTimeout(_runProgressTimer); _runProgressTimer = null; } + const el = document.getElementById('run-progress'); + if (!el) return; + try { + const r = await fetch('/api/backup/progress'); + const d = await r.json(); + if (!d.running) { + el.style.display = 'none'; + el.textContent = ''; + return; + } + const phase = d.phase || ''; + const phaseLbl = phase === 'scanning' ? 'scanning library' + : phase === 'uploading' ? 'uploading library objects' + : phase === 'snapshot' ? 'uploading snapshot' + : phase === 'pg_dump' ? 'uploading pg_dump (may take minutes)' + : phase || 'running'; + const counter = d.total > 0 ? `${d.done} / ${d.total}` : ''; + el.className = 'status-line warn'; + el.style.display = ''; + el.textContent = counter ? `Phase: ${phaseLbl} · ${counter} files scanned` : `Phase: ${phaseLbl}`; + } catch (_) { /* ignore */ } + _runProgressTimer = setTimeout(pollRunProgress, 3000); } // ── Restore ───────────────────────────────────────────────────────────── diff --git a/containers/novela/templates/editor.html b/containers/novela/templates/editor.html index c996b63..fca0712 100644 --- a/containers/novela/templates/editor.html +++ b/containers/novela/templates/editor.html @@ -46,6 +46,18 @@ Break + + + + +