Release v0.2.9

Reader: monotonic reading progress across devices — saved position only
advances, never rewinds (explicit Mark as read/unread still resets).

Plus the previously uncommitted v0.2.5–v0.2.8 work (FlareSolverr scraping,
Book Info pages, deferred chapter add/delete, scanned/uploaded backup
counters, Dropbox upload tuning, four inline editor formatting buttons,
migration logging, "New view" needs_review fix, consecutive break-image
collapsing, and the related TECHNICAL.md updates).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ivo Oskamp 2026-05-09 11:50:49 +02:00
parent 6083744b3f
commit 91f8380a1f
30 changed files with 2489 additions and 1203 deletions

View File

@ -3,75 +3,54 @@ set -euo pipefail
# ============================================================================ # ============================================================================
# build-and-push.sh # build-and-push.sh
# Location: repo root
# #
# Purpose: # Purpose:
# - Automatic version bump: # - Build & push Docker images for each service under ./containers/*
# 1 = patch, 2 = minor, 3 = major, t = test # - Two modes:
# - Test builds: only update :dev (no commit/tag) # t (test) = only push :dev
# - Release builds: update version.txt, commit, tag, push (to the current branch) # r (release) = push :<version>, :dev, :latest
# - Build & push Docker images for each service under ./compose/* # version is read from the top of changelog.md
# - Preflight checks: Docker daemon up, logged in to registry, valid names/tags #
# - Summary: show all images + tags built and pushed # No git operations: committing and tagging is done manually.
# - Branch visibility:
# - Shows currently checked out branch (authoritative)
# - Reads .last-branch for info (if present) when BRANCH is not set
# - Writes the current branch back to .last-branch at the end
# #
# Usage: # Usage:
# BRANCH=<branch> ./build-and-push.sh [bump] # BRANCH is optional; informative only # ./build-and-push.sh [mode]
# ./build-and-push.sh [bump] # - mode = t -> test build, push :dev only
# If [bump] is omitted, you will be prompted (default = t). # - mode = r -> release build, version taken from changelog.md
# - omitted -> prompt (default: t)
#
# Requirements:
# - docs/changelog.md (relative to repo root), with the most recent release
# at the top as:
# ## vX.Y.Z — YYYY-MM-DD
# (the version is parsed from the first such line)
# - One Dockerfile per service under ./containers/<service>/Dockerfile
# ============================================================================ # ============================================================================
DOCKER_REGISTRY="gitea.oskamp.info" DOCKER_REGISTRY="gitea.oskamp.info"
DOCKER_NAMESPACE="ivooskamp" DOCKER_NAMESPACE="ivooskamp"
VERSION_FILE="version.txt" CHANGELOG_FILE="docs/changelog.md"
START_VERSION="v0.1.0" CONTAINERS_DIR="containers"
COMPOSE_DIR="containers"
LAST_BRANCH_FILE=".last-branch" # stored in repo root
# --- Input: prompt if missing ------------------------------------------------ # --- Input: prompt if missing ------------------------------------------------
BUMP="${1:-}" MODE="${1:-}"
if [[ -z "${BUMP}" ]]; then if [[ -z "${MODE}" ]]; then
echo "Select bump type: [1] patch, [2] minor, [3] major, [t] test (default: t)" echo "Select build type: [t] test build (push :dev only), [r] release build (default: t)"
read -r BUMP read -r MODE
BUMP="${BUMP:-t}" MODE="${MODE:-t}"
fi fi
if [[ "$BUMP" != "1" && "$BUMP" != "2" && "$BUMP" != "3" && "$BUMP" != "t" ]]; then case "$MODE" in
echo "[ERROR] Unknown bump type '$BUMP' (use 1, 2, 3, or t)." t|test) MODE="t" ;;
exit 1 r|release) MODE="r" ;;
fi *)
echo "[ERROR] Unknown mode '$MODE' (use 't' for test or 'r' for release)."
exit 1
;;
esac
# --- Helpers ----------------------------------------------------------------- # --- Helpers -----------------------------------------------------------------
read_version() {
if [[ -f "$VERSION_FILE" ]]; then
tr -d ' \t\n\r' < "$VERSION_FILE"
else
echo "$START_VERSION"
fi
}
write_version() {
echo "$1" > "$VERSION_FILE"
}
bump_version() {
local cur="$1"
local kind="$2"
local core="${cur#v}"
IFS='.' read -r MA MI PA <<< "$core"
case "$kind" in
1) PA=$((PA + 1));;
2) MI=$((MI + 1)); PA=0;;
3) MA=$((MA + 1)); MI=0; PA=0;;
*) echo "[ERROR] Unknown bump kind"; exit 1;;
esac
echo "v${MA}.${MI}.${PA}"
}
check_docker_ready() { check_docker_ready() {
if ! docker info >/dev/null 2>&1; then if ! docker info >/dev/null 2>&1; then
echo "[ERROR] Docker daemon not reachable. Is Docker running and do you have permission to use it?" echo "[ERROR] Docker daemon not reachable. Is Docker running and do you have permission to use it?"
@ -113,14 +92,35 @@ validate_tag() {
fi fi
} }
# --- Preflight --------------------------------------------------------------- # Parse the first "## vX.Y.Z ..." heading from changelog.md.
if [[ ! -d ".git" ]]; then # Accepts: ## v1.0.3 — 2026-04-24
echo "[ERROR] Not a git repository (.git missing)." # ## v1.0.3 - 2026-04-24
exit 1 # ## v1.0.3
fi read_version_from_changelog() {
if [[ ! -f "$CHANGELOG_FILE" ]]; then
echo "[ERROR] $CHANGELOG_FILE not found in $(pwd)." >&2
exit 1
fi
local line
# Match lines starting with "## v<digits>.<digits>.<digits>"
line="$(grep -m1 -E '^##[[:space:]]+v[0-9]+\.[0-9]+\.[0-9]+' "$CHANGELOG_FILE" || true)"
if [[ -z "$line" ]]; then
echo "[ERROR] No release heading found in $CHANGELOG_FILE (expected e.g. '## v1.0.3 — 2026-04-24' near the top)." >&2
exit 1
fi
# Extract the vX.Y.Z token
local version
version="$(echo "$line" | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+' | head -n1)"
if [[ -z "$version" ]]; then
echo "[ERROR] Could not parse version from line: $line" >&2
exit 1
fi
echo "$version"
}
if [[ ! -d "$COMPOSE_DIR" ]]; then # --- Preflight ---------------------------------------------------------------
echo "[ERROR] '$COMPOSE_DIR' directory missing. Expected ./compose/<service>/ with a Dockerfile." if [[ ! -d "$CONTAINERS_DIR" ]]; then
echo "[ERROR] '$CONTAINERS_DIR' directory missing. Expected ./${CONTAINERS_DIR}/<service>/ with a Dockerfile."
exit 1 exit 1
fi fi
@ -128,90 +128,44 @@ check_docker_ready
ensure_registry_login ensure_registry_login
validate_repo_component "$DOCKER_NAMESPACE" validate_repo_component "$DOCKER_NAMESPACE"
# Detect currently checked out branch (authoritative for this script) # Informational: show branch and HEAD if this happens to be a git repo.
DETECTED_BRANCH="$(git branch --show-current 2>/dev/null || true)" BRANCH_INFO=""
if [[ -z "$DETECTED_BRANCH" ]]; then HEAD_INFO=""
DETECTED_BRANCH="$(git symbolic-ref --quiet --short HEAD 2>/dev/null || true)" if [[ -d ".git" ]]; then
BRANCH_INFO="$(git branch --show-current 2>/dev/null || echo unknown)"
HEAD_INFO="$(git rev-parse --short HEAD 2>/dev/null || echo unknown)"
echo "[INFO] Repo: $(pwd)"
echo "[INFO] Current branch: $BRANCH_INFO"
echo "[INFO] HEAD (sha): $HEAD_INFO"
else
echo "[INFO] Repo: $(pwd) (not a git checkout)"
fi fi
if [[ -z "$DETECTED_BRANCH" ]]; then
# Try to derive from upstream # --- Determine version (release only) ----------------------------------------
UPSTREAM_REF_DERIVED="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || true)" VERSION=""
if [[ -n "$UPSTREAM_REF_DERIVED" ]]; then if [[ "$MODE" == "r" ]]; then
DETECTED_BRANCH="${UPSTREAM_REF_DERIVED#origin/}" VERSION="$(read_version_from_changelog)"
echo "[INFO] Release version (from $CHANGELOG_FILE): $VERSION"
validate_tag "$VERSION"
validate_tag "latest"
# Ask for confirmation so you never accidentally re-push an old version or a wrong one.
read -r -p "Proceed building & pushing as ${VERSION}? [y/N] " CONFIRM
CONFIRM="${CONFIRM:-N}"
if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then
echo "[INFO] Aborted by user."
exit 0
fi fi
fi
if [[ -z "$DETECTED_BRANCH" ]]; then
DETECTED_BRANCH="main"
fi
# Optional signals: BRANCH env and .last-branch (informational only)
ENV_BRANCH="${BRANCH:-}"
LAST_BRANCH_FILE_PATH="$(pwd)/$LAST_BRANCH_FILE"
LAST_BRANCH_VALUE=""
if [[ -z "$ENV_BRANCH" && -f "$LAST_BRANCH_FILE_PATH" ]]; then
LAST_BRANCH_VALUE="$(tr -d ' \t\n\r' < "$LAST_BRANCH_FILE_PATH")"
fi
UPSTREAM_REF="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || echo "origin/$DETECTED_BRANCH")"
HEAD_SHA="$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")"
echo "[INFO] Repo: $(pwd)"
echo "[INFO] Current branch: $DETECTED_BRANCH"
echo "[INFO] Upstream: $UPSTREAM_REF"
echo "[INFO] HEAD (sha): $HEAD_SHA"
if [[ -n "$ENV_BRANCH" && "$ENV_BRANCH" != "$DETECTED_BRANCH" ]]; then
echo "[WARNING] BRANCH='$ENV_BRANCH' differs from checked out branch '$DETECTED_BRANCH'."
echo "[WARNING] This script does not switch branches; continuing on '$DETECTED_BRANCH'."
fi
if [[ -n "$LAST_BRANCH_VALUE" && "$LAST_BRANCH_VALUE" != "$DETECTED_BRANCH" && -z "$ENV_BRANCH" ]]; then
echo "[INFO] .last-branch suggests '$LAST_BRANCH_VALUE', but current checkout is '$DETECTED_BRANCH'."
echo "[INFO] If you intended to build '$LAST_BRANCH_VALUE', switch branches first (use update-and-build.sh)."
fi
# --- Versioning --------------------------------------------------------------
CURRENT_VERSION="$(read_version)"
NEW_VERSION="$CURRENT_VERSION"
DO_TAG_AND_BUMP=true
if [[ "$BUMP" == "t" ]]; then
echo "[INFO] Test build: keeping version $CURRENT_VERSION; will only update :dev."
DO_TAG_AND_BUMP=false
else else
NEW_VERSION="$(bump_version "$CURRENT_VERSION" "$BUMP")" echo "[INFO] Test build: only :dev will be pushed."
echo "[INFO] New version: $NEW_VERSION"
fi
if $DO_TAG_AND_BUMP; then
validate_tag "$NEW_VERSION"
fi
validate_tag "latest"
# --- Version update + VCS ops (release builds only) --------------------------
if $DO_TAG_AND_BUMP; then
echo "[INFO] Writing $NEW_VERSION to $VERSION_FILE"
write_version "$NEW_VERSION"
echo "[INFO] Git add + commit (branch: $DETECTED_BRANCH)"
git add "$VERSION_FILE"
git commit -m "Release $NEW_VERSION on branch $DETECTED_BRANCH (bump type $BUMP)"
echo "[INFO] Git tag $NEW_VERSION"
git tag -a "$NEW_VERSION" -m "Release $NEW_VERSION"
echo "[INFO] Git push + tags"
git push origin "$DETECTED_BRANCH"
git push --tags
else
echo "[INFO] Skipping commit/tagging (test build)."
fi fi
validate_tag "dev"
# --- Build & push per service ------------------------------------------------ # --- Build & push per service ------------------------------------------------
shopt -s nullglob shopt -s nullglob
services=( "$COMPOSE_DIR"/* ) services=( "$CONTAINERS_DIR"/* )
if [[ ${#services[@]} -eq 0 ]]; then if [[ ${#services[@]} -eq 0 ]]; then
echo "[ERROR] No services found under $COMPOSE_DIR" echo "[ERROR] No services found under $CONTAINERS_DIR"
exit 1 exit 1
fi fi
@ -231,18 +185,24 @@ for svc_path in "${services[@]}"; do
IMAGE_BASE="${DOCKER_REGISTRY}/${DOCKER_NAMESPACE}/${svc}" IMAGE_BASE="${DOCKER_REGISTRY}/${DOCKER_NAMESPACE}/${svc}"
if $DO_TAG_AND_BUMP; then if [[ "$MODE" == "r" ]]; then
echo "============================================================" echo "============================================================"
echo "[INFO] Building ${svc} -> tags: ${NEW_VERSION}, latest" echo "[INFO] Building ${svc} -> tags: ${VERSION}, dev, latest"
echo "============================================================" echo "============================================================"
docker build -t "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:latest" -t "${IMAGE_BASE}:dev" "$svc_path" docker build \
docker push "${IMAGE_BASE}:${NEW_VERSION}" -t "${IMAGE_BASE}:${VERSION}" \
docker push "${IMAGE_BASE}:latest" -t "${IMAGE_BASE}:dev" \
-t "${IMAGE_BASE}:latest" \
"$svc_path"
docker push "${IMAGE_BASE}:${VERSION}"
docker push "${IMAGE_BASE}:dev" docker push "${IMAGE_BASE}:dev"
BUILT_IMAGES+=("${IMAGE_BASE}:${NEW_VERSION}" "${IMAGE_BASE}:latest" "${IMAGE_BASE}:dev") docker push "${IMAGE_BASE}:latest"
BUILT_IMAGES+=("${IMAGE_BASE}:${VERSION}" "${IMAGE_BASE}:dev" "${IMAGE_BASE}:latest")
else else
echo "============================================================" echo "============================================================"
echo "[INFO] Test build ${svc} -> tag: latest" echo "[INFO] Test build ${svc} -> tag: dev"
echo "============================================================" echo "============================================================"
docker build -t "${IMAGE_BASE}:dev" "$svc_path" docker build -t "${IMAGE_BASE}:dev" "$svc_path"
docker push "${IMAGE_BASE}:dev" docker push "${IMAGE_BASE}:dev"
@ -250,21 +210,27 @@ for svc_path in "${services[@]}"; do
fi fi
done done
# --- Persist current branch to .last-branch ----------------------------------
# (This helps script 1 to preselect next time, and is informative if you run script 2 standalone)
echo "$DETECTED_BRANCH" > "$LAST_BRANCH_FILE_PATH"
# --- Summary ----------------------------------------------------------------- # --- Summary -----------------------------------------------------------------
echo "" echo ""
echo "============================================================" echo "============================================================"
echo "[SUMMARY] Build & push complete (branch: $DETECTED_BRANCH)" if [[ "$MODE" == "r" ]]; then
if $DO_TAG_AND_BUMP; then echo "[SUMMARY] Release build & push complete: $VERSION"
echo "[INFO] Release version: $NEW_VERSION"
else else
echo "[INFO] Test build (no version bump)" echo "[SUMMARY] Test build & push complete (:dev only)"
fi
if [[ -n "$BRANCH_INFO" ]]; then
echo "[INFO] Branch: $BRANCH_INFO HEAD: $HEAD_INFO"
fi fi
echo "[INFO] Images pushed:" echo "[INFO] Images pushed:"
for img in "${BUILT_IMAGES[@]}"; do for img in "${BUILT_IMAGES[@]}"; do
echo " - $img" echo " - $img"
done done
echo "============================================================" echo "============================================================"
echo ""
echo "[REMINDER] No git operations were performed. If this was a release,"
echo " commit and tag manually, e.g.:"
if [[ "$MODE" == "r" ]]; then
echo " git add -A && git commit -m \"Release ${VERSION}\""
echo " git tag -a ${VERSION} -m \"Release ${VERSION}\""
echo " git push && git push --tags"
fi

View File

@ -1,3 +1,4 @@
import functools
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
import zipfile import zipfile
@ -27,7 +28,9 @@ def _detect_format(path: Path) -> str:
return "zip" if path.suffix.lower() == ".cbz" else "rar" return "zip" if path.suffix.lower() == ".cbz" else "rar"
def cbr_page_list(path: Path) -> list[str]: @functools.lru_cache(maxsize=64)
def _cbr_page_list_cached(str_path: str, _mtime: float) -> tuple[str, ...]:
path = Path(str_path)
fmt = _detect_format(path) fmt = _detect_format(path)
if fmt == "zip": if fmt == "zip":
with zipfile.ZipFile(path) as zf: with zipfile.ZipFile(path) as zf:
@ -38,7 +41,11 @@ def cbr_page_list(path: Path) -> list[str]:
else: else:
with rarfile.RarFile(path) as rf: with rarfile.RarFile(path) as rf:
names = [n for n in rf.namelist() if Path(n).suffix.lower() in SUPPORTED_IMG] names = [n for n in rf.namelist() if Path(n).suffix.lower() in SUPPORTED_IMG]
return sorted(names) return tuple(sorted(names))
def cbr_page_list(path: Path) -> list[str]:
return list(_cbr_page_list_cached(str(path), path.stat().st_mtime))
def cbr_page_count(path: Path) -> int: def cbr_page_count(path: Path) -> int:

View File

@ -3,6 +3,218 @@ Changelog data for Novela
""" """
CHANGELOG = [ CHANGELOG = [
{
"version": "v0.2.9",
"date": "2026-05-09",
"summary": "Reading position is now monotonic across devices — only advances, never rewinds; explicit Mark as read/unread still resets.",
"sections": [
{
"title": "Bug fixes",
"type": "bugfix",
"changes": [
"Reader: reading position is now monotonic across devices — when the same book is read on multiple devices, the saved position only advances and never rewinds; previously, opening the book on a device with an older stored position would overwrite the further progress made on another device. The progress endpoint compares the incoming (chapterIndex, scrollFrac) to the stored value and only writes when the new position is strictly further. Explicit Mark as read / Mark as unread still clears the row, so deliberate restarts work as before.",
],
},
],
},
{
"version": "v0.2.8",
"date": "2026-04-22",
"summary": "Newly converted books from the grabber show up in the New view again.",
"sections": [
{
"title": "Bug fixes",
"type": "bugfix",
"changes": [
"Grabber: newly converted books now appear in the New view again — both the DB-storage and file-EPUB branches in routers/grabber.py now persist needs_review=True on upsert_book (was False); the New view filters on needs_review, so previously grabbed books never showed up there",
],
},
],
},
{
"version": "v0.2.7",
"date": "2026-04-22",
"summary": "Consecutive scene-break images are now collapsed to a single break, and TECHNICAL.md is brought up to date with recent FlareSolverr, Book Info and editor changes.",
"sections": [
{
"title": "Bug fixes",
"type": "bugfix",
"changes": [
"Break detection: runs of 2+ consecutive scene-break images are now collapsed to a single break — the reader and exported EPUBs no longer show multiple identical break images directly after each other",
],
},
{
"title": "Internal",
"type": "improvement",
"changes": [
"New helper collapse_consecutive_breaks() in xhtml.py matches 2+ consecutive break-image <center> lines (with optional whitespace between) and replaces them with one; applied in normalize_wysiwyg_html() (editor save path) and in routers/grabber.py on both the preview converted_xhtml and the per-chapter content_html produced during scraping",
"docs/TECHNICAL.md updated to cover previously missing changes: POST /api/edit/intro/{filename} and the title field on file-EPUB chapter save; FlareSolverr sidecar and BaseScraper.close(); AwesomeDudeScraper uses FlareSolverr; make_epub(include_intro=…) and epub_utils.build_book_info_body_html; grabber DB flow stores Book Info as chapter 0; 'Book Info' h1-strip skip in reader; new env vars (FLARESOLVERR_URL, FLARESOLVERR_TIMEOUT_MS, NOVELA_PORT, ADMINER_PORT); collapse_consecutive_breaks() helper",
],
},
],
},
{
"version": "v0.2.6",
"date": "2026-04-22",
"summary": "FlareSolverr sidecar lets the scraper bypass Cloudflare 'Just a moment…' challenges (awesomedude.org), with per-book sessions so chapters after the first are much faster.",
"sections": [
{
"title": "New features",
"type": "feature",
"changes": [
"Scrapers: Cloudflare-protected sites (e.g. awesomedude.org, now fully behind a 'Just a moment…' JavaScript challenge) can be scraped again via a new FlareSolverr sidecar service that solves the challenge in a headless browser; the novela container uses FlareSolverr for both the book-info page and every chapter fetch",
"Per-book FlareSolverr sessions: the scraper creates one browser session at the start of a book, reuses it across all chapters (Cloudflare cookies stay warm), and destroys it on completion — only the first request pays the full challenge-solve cost and subsequent chapters are much faster",
],
},
{
"title": "Internal",
"type": "improvement",
"changes": [
"stack/stack.yml adds a flaresolverr service (image ghcr.io/flaresolverr/flaresolverr:latest, internal-only, on novela-net); novela gains FLARESOLVERR_URL=http://flaresolverr:8191/v1 and depends_on: flaresolverr",
"Host port mappings in stack/stack.yml are now driven by ${NOVELA_PORT} and ${ADMINER_PORT}, defaulted in stack/novela.env to 8099 / 8098, so production stacks can override without diverging from the repo",
"New helpers in scrapers/base.py: flaresolverr_get(url, timeout_ms=None, session=None) returns a SimpleNamespace(text, url) as a drop-in for httpx.Response attributes; flaresolverr_session_create() and flaresolverr_session_destroy(sid) manage browser sessions; configurable via FLARESOLVERR_URL and FLARESOLVERR_TIMEOUT_MS env vars",
"BaseScraper gained an async close() method (default no-op) so scrapers can release scoped resources",
"scrapers/awesomedude.py creates a FlareSolverr session in fetch_book_info, reuses it in every fetch_chapter call, and destroys it in close()",
"routers/grabber.py wraps all three scraper usages (debug_run, preview, _run_scrape) in try/finally: await scraper.close() so FlareSolverr sessions are always released, even on errors",
],
},
],
},
{
"version": "v0.2.5",
"date": "2026-04-22",
"summary": "Book Info page generator in the editor, editable chapter titles for file EPUBs, and Book Info page auto-inserted during DB-storage conversions.",
"sections": [
{
"title": "New features",
"type": "feature",
"changes": [
"Editor: Info page button in the chapter editor toolbar generates a gayauthors-style book-info page (title, author, genres, sub-genres, tags, description, source, updated) and inserts it as the first chapter; empty metadata fields are skipped; no duplicate detection — clicking it again will add another page",
"Editor: chapter titles are now editable for file-EPUB books (DB books already supported this); the chapter-title input works for both storage types, and for file EPUBs the matching NCX navPoint is updated on save so the table of contents reflects the new title",
"Grabber: DB-storage conversions now persist the Book Info page as a real stored chapter at index 0, so it is visible in the editor and reader (EPUB-storage conversions continue to produce intro.xhtml via make_epub as before)",
],
},
{
"title": "Internal",
"type": "improvement",
"changes": [
"New endpoint POST /api/edit/intro/{filename} — for DB books shifts existing chapter_index values up by one via a two-step negation and inserts 'Book Info' at index 0; for file EPUBs writes a new intro_<hex>.xhtml via make_intro_xhtml, adds a manifest item, places the itemref at the start of the spine, and inserts a navPoint at the top of the NCX with renumbered playOrder",
"POST /api/edit/chapter/{index}/{filename} for file EPUBs now accepts a title field alongside content and updates the matching NCX navPoint text when it changes",
"make_epub gained an include_intro: bool = True parameter; DB → EPUB export (reader.py) calls it with include_intro=False because the stored chapter 0 is now the single source of truth for the info page",
"reader.py leading-h-tag stripping (get_chapter_html and DB→EPUB export) is skipped when title == 'Book Info', so the <h1>{book title}</h1> in that chapter's body survives",
"New helper epub_utils.build_book_info_body_html(title, author, info) returns the inner-body HTML fragment for DB storage; skips empty fields and separates description and source/updated blocks with <hr/>",
],
},
],
},
{
"version": "v0.2.4",
"date": "2026-04-21",
"summary": "Backup: separate Scanned vs Uploaded counts, and live phase indicator on the backup page.",
"sections": [
{
"title": "Improvements",
"type": "improvement",
"changes": [
"Backup: status and history now clearly distinguish Scanned (library files inspected) from Uploaded (objects actually sent to Dropbox — library + snapshot + pg_dump); previously only the upload count was shown, which was confusing when most files were already deduplicated",
"Backup page: live phase indicator shown under the Run buttons while a backup is running (scanning library, uploading library objects, uploading snapshot, uploading pg_dump), so it is clear the process is not stuck at N/N while snapshot and pg_dump are uploaded",
],
},
{
"title": "Internal",
"type": "improvement",
"changes": [
"Migration backup_log_scanned_files adds a scanned_files column to backup_log; /api/backup/status and /api/backup/history return uploaded_files and scanned_files (the old files_count key was renamed to uploaded_files)",
],
},
],
},
{
"version": "v0.2.3",
"date": "2026-04-21",
"summary": "Backup: Dropbox upload timeout and chunk size tuned to prevent read-timeout errors.",
"sections": [
{
"title": "Bug fixes",
"type": "bugfix",
"changes": [
"Backup: Dropbox uploads no longer fail with 'HTTPSConnectionPool ... Read timed out. (read timeout=120)' — the Dropbox client timeout was raised from 120s to 300s and the upload chunk size was reduced from 100 MB to 16 MB so each chunk completes comfortably within the timeout window",
],
},
],
},
{
"version": "v0.2.2",
"date": "2026-04-16",
"summary": "Four inline formatting buttons in the chapter editor: subheading, chat, indent, and comment block.",
"sections": [
{
"title": "New features",
"type": "feature",
"changes": [
"Editor: four inline formatting buttons added to the chapter editor toolbar — S (subheading, red bold), C (chat, orange), →| (indented paragraph), [ ] (comment block with blue left border); each button wraps the selected text or inserts an empty tag at the cursor; wrap logic automatically uses a <div> when the selection contains block elements to keep the HTML valid",
],
},
],
},
{
"version": "v0.2.1",
"date": "2026-04-16",
"summary": "Migration progress now visible in Docker logs at startup.",
"sections": [
{
"title": "Improvements",
"type": "improvement",
"changes": [
"Startup: migration progress is now visible in Docker logs — each migration logs whether it was skipped or executed (with duration in ms); a summary line shows either 'all already applied' or how many were executed",
],
},
],
},
{
"version": "v0.2.0",
"date": "2026-04-15",
"summary": "Deferred chapter save in the editor, startup performance, ETag accuracy, scraper encoding fixes, and internal hardening.",
"sections": [
{
"title": "New features",
"type": "feature",
"changes": [
"Editor: chapter add and delete are now deferred — structural changes are no longer saved immediately; they are applied in the correct order when the Save button is pressed",
"Operations: GET /health endpoint — returns {\"ok\": true} when the database is reachable; suitable for container health checks and monitoring",
],
},
{
"title": "Bug fixes",
"type": "bugfix",
"changes": [
"Editor: adding a chapter to a DB-stored book no longer fails with a UniqueViolation — PostgreSQL was checking the unique constraint on (filename, chapter_index) mid-update; fixed with a two-step index shift",
"Scraper: Codey's World pages now decode correctly — pages are read as Windows-1252 (cp1252), which correctly maps the 0x800x9F byte range; characters like …, ', \", — no longer appear as replacement characters",
"XHTML conversion: &nbsp; followed by a regular space no longer produces a double space — non-breaking spaces are normalized to regular spaces and consecutive spaces are collapsed; applies to all scrapers",
],
},
{
"title": "Improvements",
"type": "improvement",
"changes": [
"Startup: each database migration now runs only once — a schema_migrations tracking table prevents heavy migrations from re-running on every container restart; startup connection overhead reduced from 37 separate connections to 1",
"Library API: ETag now reflects changes to tags and reading progress — tag edits and progress updates correctly invalidate the client cache",
"CBR/CBZ reader: page list is cached per file and modification time — avoids opening the archive twice per page request",
"Grabber and backup: in-memory job dicts are capped at 50 entries to prevent unbounded memory growth",
],
},
{
"title": "Internal",
"type": "improvement",
"changes": [
"Shared epub_utils.py module eliminates near-identical EPUB helper functions that existed across reader.py, editor.py, and common.py; fixes a double-escaped regex in the old OPF path lookup",
"pdf_cover_thumb no longer writes a temporary file — cover thumbnail generated fully in-memory, eliminating a race condition under concurrent requests",
"security.py: hardcoded fallback encryption key removed; raises a clear error at startup when no key is configured; Fernet instance cached per process",
"builder.py: all explicit conn.commit() calls replaced with with conn: context manager",
],
},
],
},
{ {
"version": "v0.1.12", "version": "v0.1.12",
"date": "2026-04-15", "date": "2026-04-15",

View File

@ -219,6 +219,7 @@ def make_epub(
break_img_data: bytes, break_img_data: bytes,
book_id: str, book_id: str,
book_info: dict | None = None, book_info: dict | None = None,
include_intro: bool = True,
) -> bytes: ) -> bytes:
"""Build a complete EPUB 2.0 in-memory and return the bytes.""" """Build a complete EPUB 2.0 in-memory and return the bytes."""
buf = io.BytesIO() buf = io.BytesIO()
@ -240,7 +241,7 @@ def make_epub(
</container>""", </container>""",
) )
css = open("static/epub-style.css", "r", encoding="utf-8").read() css = Path("static/epub-style.css").read_text(encoding="utf-8")
zf.writestr("OEBPS/Styles/style.css", css) zf.writestr("OEBPS/Styles/style.css", css)
zf.writestr("OEBPS/Images/break.png", break_img_data) zf.writestr("OEBPS/Images/break.png", break_img_data)
@ -257,7 +258,8 @@ def make_epub(
cover_filename, cover_media_type = detect_image_format(cover_data, "cover") cover_filename, cover_media_type = detect_image_format(cover_data, "cover")
zf.writestr(f"OEBPS/Images/{cover_filename}", cover_data) zf.writestr(f"OEBPS/Images/{cover_filename}", cover_data)
zf.writestr("OEBPS/Text/intro.xhtml", make_intro_xhtml(book_title, author, info)) if include_intro:
zf.writestr("OEBPS/Text/intro.xhtml", make_intro_xhtml(book_title, author, info))
# Chapter images # Chapter images
for ch in chapters: for ch in chapters:
@ -291,12 +293,13 @@ def make_epub(
) )
manifest_items.append('<item id="break-img" href="Images/break.png" media-type="image/png"/>') manifest_items.append('<item id="break-img" href="Images/break.png" media-type="image/png"/>')
manifest_items.append('<item id="css" href="Styles/style.css" media-type="text/css"/>') manifest_items.append('<item id="css" href="Styles/style.css" media-type="text/css"/>')
manifest_items.append('<item id="intro" href="Text/intro.xhtml" media-type="application/xhtml+xml"/>') if include_intro:
manifest_items.append('<item id="intro" href="Text/intro.xhtml" media-type="application/xhtml+xml"/>')
for i, (fname, _) in enumerate(chapter_files, 1): for i, (fname, _) in enumerate(chapter_files, 1):
manifest_items.append(f'<item id="ch{i:03d}" href="Text/{fname}" media-type="application/xhtml+xml"/>') manifest_items.append(f'<item id="ch{i:03d}" href="Text/{fname}" media-type="application/xhtml+xml"/>')
manifest_items.append('<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>') manifest_items.append('<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>')
spine_items = ['<itemref idref="intro"/>'] + [ spine_items = (['<itemref idref="intro"/>'] if include_intro else []) + [
f'<itemref idref="ch{i:03d}"/>' for i in range(1, len(chapter_files) + 1) f'<itemref idref="ch{i:03d}"/>' for i in range(1, len(chapter_files) + 1)
] ]
@ -354,15 +357,18 @@ def make_epub(
zf.writestr("OEBPS/content.opf", opf) zf.writestr("OEBPS/content.opf", opf)
# TOC NCX # TOC NCX
nav_points = [ nav_points = []
""" <navPoint id="intro" playOrder="1"> if include_intro:
nav_points.append(
""" <navPoint id="intro" playOrder="1">
<navLabel><text>Book Info</text></navLabel> <navLabel><text>Book Info</text></navLabel>
<content src="Text/intro.xhtml"/> <content src="Text/intro.xhtml"/>
</navPoint>""" </navPoint>"""
] )
ch_offset = 1 if include_intro else 0
for i, (fname, title) in enumerate(chapter_files, 1): for i, (fname, title) in enumerate(chapter_files, 1):
nav_points.append( nav_points.append(
f""" <navPoint id="ch{i:03d}" playOrder="{i + 1}"> f""" <navPoint id="ch{i:03d}" playOrder="{i + ch_offset}">
<navLabel><text>{he(title)}</text></navLabel> <navLabel><text>{he(title)}</text></navLabel>
<content src="Text/{fname}"/> <content src="Text/{fname}"/>
</navPoint>""" </navPoint>"""
@ -426,7 +432,7 @@ def write_epub_file(epub_path, internal_path: str, content: str) -> None:
if not has_break: if not has_break:
try: try:
zout.writestr(break_img_path, open("static/break.png", "rb").read()) zout.writestr(break_img_path, Path("static/break.png").read_bytes())
except Exception: except Exception:
pass pass

View File

@ -0,0 +1,224 @@
"""Shared EPUB utilities — used by routers/reader.py, routers/editor.py, routers/common.py."""
import html as _html
import posixpath
import re
import zipfile as zf
from pathlib import Path
from bs4 import BeautifulSoup
def find_opf_path(names: set[str], container_xml: str | None) -> str | None:
"""Locate the OPF file path inside an EPUB ZIP."""
opf_path = "OEBPS/content.opf"
if container_xml:
m = re.search(r"full-path\s*=\s*['\"]([^'\"]+)['\"]", container_xml)
if m:
opf_path = m.group(1)
if opf_path in names:
return opf_path
candidates = sorted(n for n in names if n.lower().endswith(".opf"))
return candidates[0] if candidates else None
def norm_href(base_dir: str, rel: str) -> str:
"""Resolve a relative EPUB href against a base directory, stripping fragments."""
rel = (rel or "").split("#", 1)[0].strip()
if not rel:
return ""
return posixpath.normpath(posixpath.join(base_dir, rel)).lstrip("./")
def epub_spine(path: Path) -> list[dict]:
"""Return an ordered list of ``{index, title, href}`` for all spine items.
Supports EPUB2 (toc.ncx) and EPUB3 (nav.xhtml). Uses the OPF path from
META-INF/container.xml via :func:`find_opf_path` fixes the double-escaped
``\\\\s*`` regex that existed in the old private ``_epub_spine`` copies.
"""
with zf.ZipFile(path, "r") as z:
names = set(z.namelist())
container_xml = (
z.read("META-INF/container.xml").decode("utf-8", errors="replace")
if "META-INF/container.xml" in names
else None
)
opf_path = find_opf_path(names, container_xml)
if not opf_path:
return []
opf_xml = z.read(opf_path).decode("utf-8", errors="replace")
opf = BeautifulSoup(opf_xml, "xml")
opf_dir = posixpath.dirname(opf_path)
manifest: dict[str, str] = {}
for item in opf.find_all("item"):
iid = item.get("id")
href = item.get("href")
if iid and href:
manifest[iid] = norm_href(opf_dir, href)
spine_idrefs: list[str] = []
spine_tag = opf.find("spine")
toc_id = spine_tag.get("toc") if spine_tag else None
if spine_tag:
for ir in spine_tag.find_all("itemref"):
rid = ir.get("idref")
if rid:
spine_idrefs.append(rid)
hrefs = [manifest[rid] for rid in spine_idrefs if rid in manifest]
href_to_title: dict[str, str] = {}
# EPUB2: NCX titles
ncx_path = ""
if toc_id and toc_id in manifest:
ncx_path = manifest[toc_id]
elif "toc.ncx" in names:
ncx_path = "toc.ncx"
elif "OEBPS/toc.ncx" in names:
ncx_path = "OEBPS/toc.ncx"
if ncx_path and ncx_path in names:
try:
ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace")
ncx = BeautifulSoup(ncx_xml, "xml")
ncx_dir = posixpath.dirname(ncx_path)
for np in ncx.find_all("navPoint"):
content = np.find("content")
label_tag = np.find("text")
src = content.get("src") if content else ""
label = label_tag.get_text(strip=True) if label_tag else ""
if src and label:
href_to_title[norm_href(ncx_dir, src)] = _html.unescape(label)
except Exception:
pass
# EPUB3: nav.xhtml titles (fallback)
if not href_to_title:
nav_item = None
for item in opf.find_all("item"):
props = (item.get("properties") or "").split()
if "nav" in props:
nav_item = item
break
if nav_item and nav_item.get("href"):
nav_path = norm_href(opf_dir, nav_item.get("href"))
if nav_path in names:
try:
nav_xml = z.read(nav_path).decode("utf-8", errors="replace")
nav = BeautifulSoup(nav_xml, "lxml")
nav_dir = posixpath.dirname(nav_path)
for a in nav.select("nav a[href]"):
src = a.get("href", "")
label = a.get_text(" ", strip=True)
if src and label:
href_to_title[norm_href(nav_dir, src)] = _html.unescape(label)
except Exception:
pass
chapters = []
for i, href in enumerate(hrefs):
base = posixpath.basename(href)
title = href_to_title.get(href, re.sub(r"\.(xhtml|html|htm)$", "", base, flags=re.I))
chapters.append({"index": i, "title": title or f"Chapter {i + 1}", "href": href})
return chapters
def build_book_info_body_html(title: str, author: str, info: dict) -> str:
"""Return the body-fragment HTML for a 'Book Info' chapter.
Matches the gayauthors-style layout: title, author line, genres/sub-genres/tags,
description, and source/updated block separated by ``<hr/>``. Fields
that are empty are skipped entirely.
"""
parts: list[str] = []
t = (title or "").strip()
if t:
parts.append(f"<h1>{_html.escape(t)}</h1>")
a = (author or "").strip()
if a:
parts.append(f'<p class="author">by {_html.escape(a)}</p>')
genres = [g for g in (info.get("genres") or []) if g]
subgenres = [g for g in (info.get("subgenres") or []) if g]
tags = [t for t in (info.get("tags") or []) if t]
description = (info.get("description") or "").strip()
source_url = (info.get("source_url") or "").strip()
updated_date = (info.get("updated_date") or "").strip()
if genres:
parts.append(f'<p><strong>Genres:</strong> {_html.escape(", ".join(genres))}</p>')
if subgenres:
parts.append(f'<p><strong>Sub-genres:</strong> {_html.escape(", ".join(subgenres))}</p>')
if tags:
parts.append(f'<p><strong>Tags:</strong> {_html.escape(", ".join(tags))}</p>')
if description:
parts.append("<hr/>")
for para in description.split("\n\n"):
p = para.strip()
if p:
parts.append(f"<p>{_html.escape(p)}</p>")
if source_url or updated_date:
parts.append("<hr/>")
if source_url:
parts.append(f'<p><strong>Source:</strong> {_html.escape(source_url)}</p>')
if updated_date:
parts.append(f'<p><strong>Updated:</strong> {_html.escape(updated_date)}</p>')
return "\n".join(parts)
def make_new_chapter_xhtml(title: str) -> str:
"""Return a minimal valid XHTML 1.0 chapter stub."""
safe_title = _html.escape((title or "New chapter").strip() or "New chapter")
return (
'<?xml version="1.0" encoding="UTF-8"?>\n'
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n'
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
"<head>\n"
' <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n'
f" <title>{safe_title}</title>\n"
' <link rel="stylesheet" type="text/css" href="../Styles/style.css"/>\n'
"</head>\n"
"<body>\n"
f' <h2 class="chapter-title">{safe_title}</h2>\n'
" <p></p>\n"
"</body>\n"
"</html>\n"
)
def rewrite_epub_entries(
epub_path: Path,
updates: dict[str, bytes],
remove_paths: set[str] | None = None,
) -> None:
"""Rewrite entries in an EPUB ZIP.
Crash-safe: writes to a ``.tmp.epub`` file first, then atomically replaces
the original. The ``mimetype`` entry is always stored uncompressed
(ZIP_STORED) as required by the EPUB spec.
"""
remove_paths = set(remove_paths or set())
tmp = epub_path.with_suffix(".tmp.epub")
with zf.ZipFile(epub_path, "r") as zin, zf.ZipFile(tmp, "w", compression=zf.ZIP_DEFLATED) as zout:
existing: set[str] = set()
for item in zin.infolist():
name = item.filename
existing.add(name)
if name in remove_paths:
continue
data = updates.get(name, zin.read(name))
ctype = zf.ZIP_STORED if name == "mimetype" else zf.ZIP_DEFLATED
zout.writestr(item, data, compress_type=ctype)
for name, data in updates.items():
if name not in existing and name not in remove_paths:
ctype = zf.ZIP_STORED if name == "mimetype" else zf.ZIP_DEFLATED
zout.writestr(name, data, compress_type=ctype)
tmp.replace(epub_path)

View File

@ -1,10 +1,13 @@
import logging
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.responses import RedirectResponse
logging.basicConfig(level=logging.INFO)
from fastapi.responses import JSONResponse, RedirectResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from db import close_pool, init_pool from db import close_pool, get_db_conn, init_pool
from migrations import run_migrations from migrations import run_migrations
from routers.backup import start_backup_scheduler, stop_backup_scheduler from routers.backup import start_backup_scheduler, stop_backup_scheduler
from routers import ( from routers import (
@ -50,6 +53,18 @@ app.include_router(changelog_router)
app.include_router(search_router) app.include_router(search_router)
@app.get("/health")
async def health():
try:
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute("SELECT 1")
db_ok = True
except Exception:
db_ok = False
return JSONResponse({"ok": db_ok})
@app.get("/") @app.get("/")
async def index_redirect(): async def index_redirect():
return RedirectResponse(url="/home", status_code=302) return RedirectResponse(url="/home", status_code=302)

View File

@ -1,7 +1,11 @@
import logging
import re import re
import time
from db import direct_connect from db import direct_connect
logger = logging.getLogger(__name__)
_DEFAULT_REGEX = [ _DEFAULT_REGEX = [
r"^\s*[\*\-]{3,}\s*$", r"^\s*[\*\-]{3,}\s*$",
r"^\s*[·•◦‣⁃]\s*[·•◦‣⁃]\s*[·•◦‣⁃]\s*$", r"^\s*[·•◦‣⁃]\s*[·•◦‣⁃]\s*[·•◦‣⁃]\s*$",
@ -24,17 +28,32 @@ _DEFAULT_CSS = [
] ]
def _exec(sql: str) -> None: def _exec(sql: str, conn) -> None:
conn = direct_connect() with conn.cursor() as cur:
try: cur.execute(sql)
with conn:
with conn.cursor() as cur:
cur.execute(sql)
finally:
conn.close()
def migrate_create_library() -> None: def _run_once(conn, name: str, fn) -> bool:
"""Run fn(conn) only if name has not been recorded in schema_migrations.
Returns True if the migration was executed, False if it was skipped."""
with conn.cursor() as cur:
cur.execute("SELECT 1 FROM schema_migrations WHERE name = %s", (name,))
if cur.fetchone():
logger.info("%s — skipped (already applied)", name)
return False
t0 = time.time()
fn(conn)
with conn.cursor() as cur:
cur.execute(
"INSERT INTO schema_migrations (name) VALUES (%s) ON CONFLICT DO NOTHING",
(name,),
)
conn.commit()
logger.info("%s — executed in %dms", name, int((time.time() - t0) * 1000))
return True
def migrate_create_library(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS library ( CREATE TABLE IF NOT EXISTS library (
@ -57,11 +76,12 @@ def migrate_create_library() -> None:
created_at TIMESTAMP DEFAULT NOW(), created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW() updated_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
def migrate_create_book_tags() -> None: def migrate_create_book_tags(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS book_tags ( CREATE TABLE IF NOT EXISTS book_tags (
@ -71,12 +91,13 @@ def migrate_create_book_tags() -> None:
tag_type VARCHAR(20) NOT NULL, tag_type VARCHAR(20) NOT NULL,
UNIQUE (filename, tag, tag_type) UNIQUE (filename, tag, tag_type)
) )
""" """,
conn,
) )
_exec("CREATE INDEX IF NOT EXISTS idx_book_tags_filename ON book_tags (filename)") _exec("CREATE INDEX IF NOT EXISTS idx_book_tags_filename ON book_tags (filename)", conn)
def migrate_create_reading_progress() -> None: def migrate_create_reading_progress(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS reading_progress ( CREATE TABLE IF NOT EXISTS reading_progress (
@ -87,11 +108,12 @@ def migrate_create_reading_progress() -> None:
progress INTEGER DEFAULT 0, progress INTEGER DEFAULT 0,
updated_at TIMESTAMP DEFAULT NOW() updated_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
def migrate_create_reading_sessions() -> None: def migrate_create_reading_sessions(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS reading_sessions ( CREATE TABLE IF NOT EXISTS reading_sessions (
@ -99,12 +121,13 @@ def migrate_create_reading_sessions() -> None:
filename VARCHAR(600) NOT NULL REFERENCES library(filename) ON DELETE CASCADE, filename VARCHAR(600) NOT NULL REFERENCES library(filename) ON DELETE CASCADE,
read_at TIMESTAMP DEFAULT NOW() read_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
_exec("CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename ON reading_sessions (filename)") _exec("CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename ON reading_sessions (filename)", conn)
def migrate_create_library_cover_cache() -> None: def migrate_create_library_cover_cache(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS library_cover_cache ( CREATE TABLE IF NOT EXISTS library_cover_cache (
@ -113,11 +136,12 @@ def migrate_create_library_cover_cache() -> None:
thumb_webp BYTEA NOT NULL, thumb_webp BYTEA NOT NULL,
updated_at TIMESTAMP DEFAULT NOW() updated_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
def migrate_create_credentials() -> None: def migrate_create_credentials(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS credentials ( CREATE TABLE IF NOT EXISTS credentials (
@ -127,13 +151,14 @@ def migrate_create_credentials() -> None:
password TEXT NOT NULL, password TEXT NOT NULL,
updated_at TIMESTAMP DEFAULT NOW() updated_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
_exec("ALTER TABLE credentials ALTER COLUMN username TYPE TEXT") _exec("ALTER TABLE credentials ALTER COLUMN username TYPE TEXT", conn)
_exec("ALTER TABLE credentials ALTER COLUMN password TYPE TEXT") _exec("ALTER TABLE credentials ALTER COLUMN password TYPE TEXT", conn)
def migrate_create_break_patterns() -> None: def migrate_create_break_patterns(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS break_patterns ( CREATE TABLE IF NOT EXISTS break_patterns (
@ -145,39 +170,35 @@ def migrate_create_break_patterns() -> None:
created_at TIMESTAMP DEFAULT NOW(), created_at TIMESTAMP DEFAULT NOW(),
UNIQUE (pattern_type, pattern) UNIQUE (pattern_type, pattern)
) )
""" """,
conn,
) )
def migrate_seed_break_patterns() -> None: def migrate_seed_break_patterns(conn) -> None:
conn = direct_connect() with conn.cursor() as cur:
try: for pat in _DEFAULT_REGEX:
with conn: re.compile(pat)
with conn.cursor() as cur: cur.execute(
for pat in _DEFAULT_REGEX: """
re.compile(pat) INSERT INTO break_patterns (pattern_type, pattern, is_default)
cur.execute( VALUES ('regex', %s, TRUE)
""" ON CONFLICT (pattern_type, pattern) DO NOTHING
INSERT INTO break_patterns (pattern_type, pattern, is_default) """,
VALUES ('regex', %s, TRUE) (pat,),
ON CONFLICT (pattern_type, pattern) DO NOTHING )
""", for pat in _DEFAULT_CSS:
(pat,), cur.execute(
) """
for pat in _DEFAULT_CSS: INSERT INTO break_patterns (pattern_type, pattern, is_default)
cur.execute( VALUES ('css_class', %s, TRUE)
""" ON CONFLICT (pattern_type, pattern) DO NOTHING
INSERT INTO break_patterns (pattern_type, pattern, is_default) """,
VALUES ('css_class', %s, TRUE) (pat,),
ON CONFLICT (pattern_type, pattern) DO NOTHING )
""",
(pat,),
)
finally:
conn.close()
def migrate_create_backup_log() -> None: def migrate_create_backup_log(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS backup_log ( CREATE TABLE IF NOT EXISTS backup_log (
@ -189,15 +210,20 @@ def migrate_create_backup_log() -> None:
started_at TIMESTAMP DEFAULT NOW(), started_at TIMESTAMP DEFAULT NOW(),
finished_at TIMESTAMP finished_at TIMESTAMP
) )
""" """,
conn,
) )
def migrate_add_rating() -> None: def migrate_backup_log_scanned_files(conn) -> None:
_exec("ALTER TABLE library ADD COLUMN IF NOT EXISTS rating SMALLINT NOT NULL DEFAULT 0") _exec("ALTER TABLE backup_log ADD COLUMN IF NOT EXISTS scanned_files INTEGER", conn)
def migrate_create_bookmarks() -> None: def migrate_add_rating(conn) -> None:
_exec("ALTER TABLE library ADD COLUMN IF NOT EXISTS rating SMALLINT NOT NULL DEFAULT 0", conn)
def migrate_create_bookmarks(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS bookmarks ( CREATE TABLE IF NOT EXISTS bookmarks (
@ -209,17 +235,17 @@ def migrate_create_bookmarks() -> None:
note TEXT NOT NULL DEFAULT '', note TEXT NOT NULL DEFAULT '',
created_at TIMESTAMPTZ DEFAULT NOW() created_at TIMESTAMPTZ DEFAULT NOW()
) )
""" """,
conn,
) )
_exec("CREATE INDEX IF NOT EXISTS idx_bookmarks_filename ON bookmarks (filename)") _exec("CREATE INDEX IF NOT EXISTS idx_bookmarks_filename ON bookmarks (filename)", conn)
def migrate_remove_cover_missing_tag() -> None: def migrate_remove_cover_missing_tag(conn) -> None:
_exec("DELETE FROM book_tags WHERE tag = 'Cover Missing' AND tag_type = 'tag'") _exec("DELETE FROM book_tags WHERE tag = 'Cover Missing' AND tag_type = 'tag'", conn)
def migrate_create_perf_indexes() -> None: def migrate_create_perf_indexes(conn) -> None:
# Match library list sorting and common filters.
_exec( _exec(
""" """
CREATE INDEX IF NOT EXISTS idx_library_sort_coalesce CREATE INDEX IF NOT EXISTS idx_library_sort_coalesce
@ -230,38 +256,38 @@ def migrate_create_perf_indexes() -> None:
series_index, series_index,
(COALESCE(title, '')) (COALESCE(title, ''))
) )
""" """,
conn,
) )
_exec("CREATE INDEX IF NOT EXISTS idx_library_needs_review ON library (needs_review)") _exec("CREATE INDEX IF NOT EXISTS idx_library_needs_review ON library (needs_review)", conn)
_exec("CREATE INDEX IF NOT EXISTS idx_library_archived ON library (archived)") _exec("CREATE INDEX IF NOT EXISTS idx_library_archived ON library (archived)", conn)
# Speeds grouped reads + recent-read lookups.
_exec( _exec(
""" """
CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename_readat CREATE INDEX IF NOT EXISTS idx_reading_sessions_filename_readat
ON reading_sessions (filename, read_at DESC) ON reading_sessions (filename, read_at DESC)
""" """,
conn,
) )
# Helps ORDER BY filename, tag fetch for tag-map construction.
_exec( _exec(
""" """
CREATE INDEX IF NOT EXISTS idx_book_tags_filename_tag CREATE INDEX IF NOT EXISTS idx_book_tags_filename_tag
ON book_tags (filename, tag) ON book_tags (filename, tag)
""" """,
conn,
) )
def migrate_series_suffix() -> None: def migrate_series_suffix(conn) -> None:
_exec( _exec(
""" """
ALTER TABLE library ALTER TABLE library
ADD COLUMN IF NOT EXISTS series_suffix VARCHAR(10) NOT NULL DEFAULT '' ADD COLUMN IF NOT EXISTS series_suffix VARCHAR(10) NOT NULL DEFAULT ''
""" """,
conn,
) )
def migrate_create_builder_drafts() -> None: def migrate_create_builder_drafts(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS builder_drafts ( CREATE TABLE IF NOT EXISTS builder_drafts (
@ -274,11 +300,12 @@ def migrate_create_builder_drafts() -> None:
created_at TIMESTAMP DEFAULT NOW(), created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW() updated_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
def migrate_create_authors() -> None: def migrate_create_authors(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS authors ( CREATE TABLE IF NOT EXISTS authors (
@ -288,21 +315,23 @@ def migrate_create_authors() -> None:
created_at TIMESTAMP DEFAULT NOW(), created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW() updated_at TIMESTAMP DEFAULT NOW()
) )
""" """,
conn,
) )
def migrate_rename_hiatus() -> None: def migrate_rename_hiatus(conn) -> None:
_exec("UPDATE library SET publication_status = 'Long-Term Hold' WHERE publication_status = 'Hiatus'") _exec("UPDATE library SET publication_status = 'Long-Term Hold' WHERE publication_status = 'Hiatus'", conn)
def migrate_add_storage_type() -> None: def migrate_add_storage_type(conn) -> None:
_exec( _exec(
"ALTER TABLE library ADD COLUMN IF NOT EXISTS storage_type VARCHAR(10) NOT NULL DEFAULT 'file'" "ALTER TABLE library ADD COLUMN IF NOT EXISTS storage_type VARCHAR(10) NOT NULL DEFAULT 'file'",
conn,
) )
def migrate_create_book_images() -> None: def migrate_create_book_images(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS book_images ( CREATE TABLE IF NOT EXISTS book_images (
@ -311,11 +340,12 @@ def migrate_create_book_images() -> None:
media_type VARCHAR(100) NOT NULL, media_type VARCHAR(100) NOT NULL,
size_bytes INTEGER NOT NULL DEFAULT 0 size_bytes INTEGER NOT NULL DEFAULT 0
) )
""" """,
conn,
) )
def migrate_create_book_chapters() -> None: def migrate_create_book_chapters(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS book_chapters ( CREATE TABLE IF NOT EXISTS book_chapters (
@ -327,29 +357,33 @@ def migrate_create_book_chapters() -> None:
content_tsv TSVECTOR, content_tsv TSVECTOR,
UNIQUE (filename, chapter_index) UNIQUE (filename, chapter_index)
) )
""" """,
conn,
) )
_exec( _exec(
"CREATE INDEX IF NOT EXISTS idx_book_chapters_filename ON book_chapters (filename, chapter_index)" "CREATE INDEX IF NOT EXISTS idx_book_chapters_filename ON book_chapters (filename, chapter_index)",
conn,
) )
_exec( _exec(
"CREATE INDEX IF NOT EXISTS idx_book_chapters_tsv ON book_chapters USING GIN (content_tsv)" "CREATE INDEX IF NOT EXISTS idx_book_chapters_tsv ON book_chapters USING GIN (content_tsv)",
conn,
) )
def migrate_rebuild_chapter_tsv_with_title() -> None: def migrate_rebuild_chapter_tsv_with_title(conn) -> None:
"""Rebuild content_tsv to include chapter title (safe to run repeatedly).""" """Rebuild content_tsv to include chapter title. Runs once via schema_migrations tracking."""
_exec( _exec(
""" """
UPDATE book_chapters UPDATE book_chapters
SET content_tsv = to_tsvector('simple', SET content_tsv = to_tsvector('simple',
COALESCE(title, '') || ' ' || COALESCE(title, '') || ' ' ||
regexp_replace(COALESCE(content, ''), '<[^>]*>', ' ', 'g')) regexp_replace(COALESCE(content, ''), '<[^>]*>', ' ', 'g'))
""" """,
conn,
) )
def migrate_create_app_settings() -> None: def migrate_create_app_settings(conn) -> None:
_exec( _exec(
""" """
CREATE TABLE IF NOT EXISTS app_settings ( CREATE TABLE IF NOT EXISTS app_settings (
@ -357,47 +391,75 @@ def migrate_create_app_settings() -> None:
develop_mode BOOLEAN NOT NULL DEFAULT FALSE, develop_mode BOOLEAN NOT NULL DEFAULT FALSE,
CONSTRAINT single_row CHECK (id = 1) CONSTRAINT single_row CHECK (id = 1)
) )
""" """,
conn,
) )
_exec("INSERT INTO app_settings (id, develop_mode) VALUES (1, FALSE) ON CONFLICT DO NOTHING") _exec("INSERT INTO app_settings (id, develop_mode) VALUES (1, FALSE) ON CONFLICT DO NOTHING", conn)
def migrate_app_settings_break_image() -> None: def migrate_app_settings_break_image(conn) -> None:
_exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_sha256 VARCHAR(64) DEFAULT NULL") _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_sha256 VARCHAR(64) DEFAULT NULL", conn)
_exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_ext VARCHAR(10) DEFAULT NULL") _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_ext VARCHAR(10) DEFAULT NULL", conn)
def migrate_series_volume() -> None: def migrate_series_volume(conn) -> None:
_exec( _exec(
""" """
ALTER TABLE library ALTER TABLE library
ADD COLUMN IF NOT EXISTS series_volume VARCHAR(20) NOT NULL DEFAULT '' ADD COLUMN IF NOT EXISTS series_volume VARCHAR(20) NOT NULL DEFAULT ''
""" """,
conn,
) )
def run_migrations() -> None: def run_migrations() -> None:
migrate_create_library() t_start = time.time()
migrate_create_book_tags() logger.info("Starting migrations...")
migrate_create_reading_progress() conn = direct_connect()
migrate_create_reading_sessions() try:
migrate_create_library_cover_cache() # Bootstrap: create schema_migrations table (always idempotent, no tracking needed).
migrate_create_credentials() with conn:
migrate_create_break_patterns() with conn.cursor() as cur:
migrate_create_backup_log() cur.execute(
migrate_create_perf_indexes() """
migrate_seed_break_patterns() CREATE TABLE IF NOT EXISTS schema_migrations (
migrate_add_rating() name VARCHAR(200) PRIMARY KEY,
migrate_remove_cover_missing_tag() applied_at TIMESTAMP DEFAULT NOW()
migrate_create_bookmarks() )
migrate_series_suffix() """
migrate_create_builder_drafts() )
migrate_create_authors()
migrate_rename_hiatus() executed = sum([
migrate_add_storage_type() _run_once(conn, "create_library", migrate_create_library),
migrate_create_book_images() _run_once(conn, "create_book_tags", migrate_create_book_tags),
migrate_create_book_chapters() _run_once(conn, "create_reading_progress", migrate_create_reading_progress),
migrate_rebuild_chapter_tsv_with_title() _run_once(conn, "create_reading_sessions", migrate_create_reading_sessions),
migrate_create_app_settings() _run_once(conn, "create_library_cover_cache", migrate_create_library_cover_cache),
migrate_app_settings_break_image() _run_once(conn, "create_credentials", migrate_create_credentials),
migrate_series_volume() _run_once(conn, "create_break_patterns", migrate_create_break_patterns),
_run_once(conn, "create_backup_log", migrate_create_backup_log),
_run_once(conn, "create_perf_indexes", migrate_create_perf_indexes),
_run_once(conn, "seed_break_patterns", migrate_seed_break_patterns),
_run_once(conn, "add_rating", migrate_add_rating),
_run_once(conn, "remove_cover_missing_tag", migrate_remove_cover_missing_tag),
_run_once(conn, "create_bookmarks", migrate_create_bookmarks),
_run_once(conn, "series_suffix", migrate_series_suffix),
_run_once(conn, "create_builder_drafts", migrate_create_builder_drafts),
_run_once(conn, "create_authors", migrate_create_authors),
_run_once(conn, "rename_hiatus", migrate_rename_hiatus),
_run_once(conn, "add_storage_type", migrate_add_storage_type),
_run_once(conn, "create_book_images", migrate_create_book_images),
_run_once(conn, "create_book_chapters", migrate_create_book_chapters),
_run_once(conn, "rebuild_chapter_tsv_with_title", migrate_rebuild_chapter_tsv_with_title),
_run_once(conn, "create_app_settings", migrate_create_app_settings),
_run_once(conn, "app_settings_break_image", migrate_app_settings_break_image),
_run_once(conn, "series_volume", migrate_series_volume),
_run_once(conn, "backup_log_scanned_files", migrate_backup_log_scanned_files),
])
finally:
conn.close()
elapsed = time.time() - t_start
if executed == 0:
logger.info("Migrations complete in %.1fs — all already applied", elapsed)
else:
logger.info("Migrations complete in %.1fs — %d executed", elapsed, executed)

View File

@ -35,19 +35,24 @@ def _webp_thumb_from_image(path: Path) -> bytes:
return out.getvalue() return out.getvalue()
def _webp_thumb_from_pil(im: Image.Image) -> bytes:
if im.mode not in ("RGB", "RGBA"):
im = im.convert("RGB")
thumb = ImageOps.fit(im, (COVER_W, COVER_H), method=Image.Resampling.LANCZOS)
from io import BytesIO
out = BytesIO()
thumb.save(out, format="WEBP", quality=82, method=6)
return out.getvalue()
def pdf_cover_thumb(path: Path) -> bytes: def pdf_cover_thumb(path: Path) -> bytes:
with fitz.open(path) as doc: with fitz.open(path) as doc:
if doc.page_count == 0: if doc.page_count == 0:
raise ValueError("PDF has no pages") raise ValueError("PDF has no pages")
page = doc.load_page(0) page = doc.load_page(0)
pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5), alpha=False) pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5), alpha=False)
tmp = path.with_suffix(".cover.tmp.png") img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
try: return _webp_thumb_from_pil(img)
pix.save(tmp)
return _webp_thumb_from_image(tmp)
finally:
if tmp.exists():
tmp.unlink(missing_ok=True)
def pdf_scan_metadata(path: Path) -> dict: def pdf_scan_metadata(path: Path) -> dict:

View File

@ -408,11 +408,11 @@ def _dbx() -> dropbox.Dropbox:
oauth2_refresh_token=token, oauth2_refresh_token=token,
app_key=app_key, app_key=app_key,
app_secret=app_secret, app_secret=app_secret,
timeout=120, timeout=300,
) )
else: else:
# Fallback: legacy access token # Fallback: legacy access token
client = dropbox.Dropbox(token, timeout=120) client = dropbox.Dropbox(token, timeout=300)
client.users_get_current_account() client.users_get_current_account()
except AuthError as e: except AuthError as e:
@ -434,8 +434,8 @@ def _ensure_dropbox_dir(client: dropbox.Dropbox, path: str) -> None:
pass pass
_DROPBOX_UPLOAD_CHUNK = 100 * 1024 * 1024 # 100 MB — below the 150 MB files_upload limit _DROPBOX_UPLOAD_CHUNK = 16 * 1024 * 1024 # 16 MB — keeps each chunk well within request timeout
_DROPBOX_UPLOAD_THRESHOLD = 148 * 1024 * 1024 # use session upload above this size _DROPBOX_UPLOAD_THRESHOLD = 16 * 1024 * 1024 # use session upload above this size
def _dropbox_upload_bytes(client: dropbox.Dropbox, target_path: str, data: bytes) -> int: def _dropbox_upload_bytes(client: dropbox.Dropbox, target_path: str, data: bytes) -> int:
@ -621,7 +621,15 @@ def _insert_backup_log_running() -> int:
return int(cur.fetchone()[0]) return int(cur.fetchone()[0])
def _finish_backup_log(log_id: int, *, status: str, files_count: int | None, size_bytes: int | None, error_msg: str | None) -> None: def _finish_backup_log(
log_id: int,
*,
status: str,
files_count: int | None,
scanned_files: int | None,
size_bytes: int | None,
error_msg: str | None,
) -> None:
with get_db_conn() as conn: with get_db_conn() as conn:
with conn: with conn:
with conn.cursor() as cur: with conn.cursor() as cur:
@ -630,12 +638,13 @@ def _finish_backup_log(log_id: int, *, status: str, files_count: int | None, siz
UPDATE backup_log UPDATE backup_log
SET status = %s, SET status = %s,
files_count = %s, files_count = %s,
scanned_files = %s,
size_bytes = %s, size_bytes = %s,
error_msg = %s, error_msg = %s,
finished_at = NOW() finished_at = NOW()
WHERE id = %s WHERE id = %s
""", """,
(status, files_count, size_bytes, error_msg, log_id), (status, files_count, scanned_files, size_bytes, error_msg, log_id),
) )
@ -695,7 +704,7 @@ def _prune_orphan_objects(client: dropbox.Dropbox, objects_root: str, referenced
return _dropbox_delete_paths(client, to_delete) return _dropbox_delete_paths(client, to_delete)
def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> tuple[int, int]: def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> tuple[int, int, int]:
def _prog(done: int, total: int, phase: str) -> None: def _prog(done: int, total: int, phase: str) -> None:
if progress_key is not None: if progress_key is not None:
BACKUP_PROGRESS[progress_key] = {"done": done, "total": total, "phase": phase} BACKUP_PROGRESS[progress_key] = {"done": done, "total": total, "phase": phase}
@ -792,7 +801,7 @@ def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> t
if not dry_run: if not dry_run:
_save_manifest(new_manifest) _save_manifest(new_manifest)
return uploaded_count, uploaded_size return total_files, uploaded_count, uploaded_size
@router.get("/backup", response_class=HTMLResponse) @router.get("/backup", response_class=HTMLResponse)
@ -953,7 +962,7 @@ async def backup_status():
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute( cur.execute(
""" """
SELECT id, status, files_count, size_bytes, error_msg, started_at, finished_at SELECT id, status, files_count, scanned_files, size_bytes, error_msg, started_at, finished_at
FROM backup_log FROM backup_log
ORDER BY started_at DESC ORDER BY started_at DESC
LIMIT 1 LIMIT 1
@ -965,11 +974,12 @@ async def backup_status():
return { return {
"id": row[0], "id": row[0],
"status": row[1], "status": row[1],
"files_count": row[2], "uploaded_files": row[2],
"size_bytes": row[3], "scanned_files": row[3],
"error_msg": row[4], "size_bytes": row[4],
"started_at": row[5].isoformat() if row[5] else None, "error_msg": row[5],
"finished_at": row[6].isoformat() if row[6] else None, "started_at": row[6].isoformat() if row[6] else None,
"finished_at": row[7].isoformat() if row[7] else None,
} }
@ -979,7 +989,7 @@ async def backup_history():
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute( cur.execute(
""" """
SELECT id, status, files_count, size_bytes, error_msg, started_at, finished_at SELECT id, status, files_count, scanned_files, size_bytes, error_msg, started_at, finished_at
FROM backup_log FROM backup_log
ORDER BY started_at DESC ORDER BY started_at DESC
LIMIT 20 LIMIT 20
@ -990,11 +1000,12 @@ async def backup_history():
{ {
"id": r[0], "id": r[0],
"status": r[1], "status": r[1],
"files_count": r[2], "uploaded_files": r[2],
"size_bytes": r[3], "scanned_files": r[3],
"error_msg": r[4], "size_bytes": r[4],
"started_at": r[5].isoformat() if r[5] else None, "error_msg": r[5],
"finished_at": r[6].isoformat() if r[6] else None, "started_at": r[6].isoformat() if r[6] else None,
"finished_at": r[7].isoformat() if r[7] else None,
} }
for r in rows for r in rows
] ]
@ -1002,6 +1013,10 @@ async def backup_history():
def _start_backup_task(*, dry_run: bool) -> int: def _start_backup_task(*, dry_run: bool) -> int:
log_id = _insert_backup_log_running() log_id = _insert_backup_log_running()
while len(BACKUP_TASKS) >= 50:
oldest = next(iter(BACKUP_TASKS))
BACKUP_TASKS.pop(oldest, None)
BACKUP_PROGRESS.pop(oldest, None)
task = asyncio.create_task(_run_backup_job(log_id, dry_run)) task = asyncio.create_task(_run_backup_job(log_id, dry_run))
BACKUP_TASKS[log_id] = task BACKUP_TASKS[log_id] = task
return log_id return log_id
@ -1062,13 +1077,14 @@ async def stop_backup_scheduler() -> None:
async def _run_backup_job(log_id: int, dry_run: bool) -> None: async def _run_backup_job(log_id: int, dry_run: bool) -> None:
BACKUP_PROGRESS[log_id] = {"done": 0, "total": 0, "phase": "starting"} BACKUP_PROGRESS[log_id] = {"done": 0, "total": 0, "phase": "starting"}
try: try:
files_count, size_bytes = await asyncio.to_thread( scanned_files, files_count, size_bytes = await asyncio.to_thread(
_run_backup_internal, dry_run=dry_run, progress_key=log_id _run_backup_internal, dry_run=dry_run, progress_key=log_id
) )
_finish_backup_log( _finish_backup_log(
log_id, log_id,
status="success", status="success",
files_count=files_count, files_count=files_count,
scanned_files=scanned_files,
size_bytes=size_bytes, size_bytes=size_bytes,
error_msg=None, error_msg=None,
) )
@ -1077,6 +1093,7 @@ async def _run_backup_job(log_id: int, dry_run: bool) -> None:
log_id, log_id,
status="error", status="error",
files_count=None, files_count=None,
scanned_files=None,
size_bytes=None, size_bytes=None,
error_msg=str(e), error_msg=str(e),
) )

View File

@ -82,14 +82,14 @@ async def create_draft(request: Request):
return HTMLResponse("Titel en auteur zijn verplicht", status_code=400) return HTMLResponse("Titel en auteur zijn verplicht", status_code=400)
with get_db_conn() as conn: with get_db_conn() as conn:
with conn.cursor() as cur: with conn:
cur.execute( with conn.cursor() as cur:
"INSERT INTO builder_drafts (title, author, publisher, source_url, chapters) " cur.execute(
"VALUES (%s, %s, %s, %s, '[]'::jsonb) RETURNING id", "INSERT INTO builder_drafts (title, author, publisher, source_url, chapters) "
(title, author, publisher, source_url), "VALUES (%s, %s, %s, %s, '[]'::jsonb) RETURNING id",
) (title, author, publisher, source_url),
draft_id = str(cur.fetchone()[0]) )
conn.commit() draft_id = str(cur.fetchone()[0])
return RedirectResponse(f"/builder/{draft_id}", status_code=303) return RedirectResponse(f"/builder/{draft_id}", status_code=303)
@ -97,9 +97,9 @@ async def create_draft(request: Request):
@router.delete("/api/builder/{draft_id}") @router.delete("/api/builder/{draft_id}")
async def delete_draft(draft_id: str): async def delete_draft(draft_id: str):
with get_db_conn() as conn: with get_db_conn() as conn:
with conn.cursor() as cur: with conn:
cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,)) with conn.cursor() as cur:
conn.commit() cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,))
return JSONResponse({"ok": True}) return JSONResponse({"ok": True})
@ -130,13 +130,13 @@ async def add_chapter(draft_id: str, request: Request):
insert_at = after_index + 1 if 0 <= after_index < len(chapters) else len(chapters) insert_at = after_index + 1 if 0 <= after_index < len(chapters) else len(chapters)
chapters.insert(insert_at, new_chapter) chapters.insert(insert_at, new_chapter)
with conn.cursor() as cur: with conn:
cur.execute( with conn.cursor() as cur:
"UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " cur.execute(
"WHERE id = %s", "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() "
(json.dumps(chapters), draft_id), "WHERE id = %s",
) (json.dumps(chapters), draft_id),
conn.commit() )
return JSONResponse({"ok": True, "index": insert_at, "count": len(chapters)}) return JSONResponse({"ok": True, "index": insert_at, "count": len(chapters)})
@ -159,13 +159,13 @@ async def save_chapter(draft_id: str, idx: int, request: Request):
if "content" in body: if "content" in body:
chapters[idx]["content"] = body["content"] chapters[idx]["content"] = body["content"]
with conn.cursor() as cur: with conn:
cur.execute( with conn.cursor() as cur:
"UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " cur.execute(
"WHERE id = %s", "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() "
(json.dumps(chapters), draft_id), "WHERE id = %s",
) (json.dumps(chapters), draft_id),
conn.commit() )
return JSONResponse({"ok": True}) return JSONResponse({"ok": True})
@ -187,13 +187,13 @@ async def delete_chapter(draft_id: str, idx: int):
chapters.pop(idx) chapters.pop(idx)
with conn.cursor() as cur: with conn:
cur.execute( with conn.cursor() as cur:
"UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() " cur.execute(
"WHERE id = %s", "UPDATE builder_drafts SET chapters = %s::jsonb, updated_at = NOW() "
(json.dumps(chapters), draft_id), "WHERE id = %s",
) (json.dumps(chapters), draft_id),
conn.commit() )
return JSONResponse({"ok": True, "index": min(idx, len(chapters) - 1), "count": len(chapters)}) return JSONResponse({"ok": True, "index": min(idx, len(chapters) - 1), "count": len(chapters)})
@ -260,10 +260,9 @@ async def publish_draft(draft_id: str):
"needs_review": True, "needs_review": True,
"has_cover": False, "has_cover": False,
} }
upsert_book(conn, filename, meta) with conn:
upsert_book(conn, filename, meta)
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,)) cur.execute("DELETE FROM builder_drafts WHERE id = %s", (draft_id,))
conn.commit()
return JSONResponse({"ok": True, "filename": filename}) return JSONResponse({"ok": True, "filename": filename})

View File

@ -14,6 +14,7 @@ from PIL import Image, ImageOps, UnidentifiedImageError
from cbr import cbr_cover_thumb, cbr_page_count from cbr import cbr_cover_thumb, cbr_page_count
from db import get_db_conn from db import get_db_conn
from epub_utils import find_opf_path
from pdf import pdf_cover_thumb, pdf_page_count, pdf_scan_metadata from pdf import pdf_cover_thumb, pdf_page_count, pdf_scan_metadata
LIBRARY_DIR = Path("library") LIBRARY_DIR = Path("library")
@ -221,18 +222,6 @@ def prune_empty_dirs(start_dir: Path) -> None:
cur = cur.parent cur = cur.parent
def _find_opf_path(names: set[str], container_xml: str | None) -> str | None:
opf_path = "OEBPS/content.opf"
if container_xml:
m = re.search(r"full-path\s*=\s*['\"]([^'\"]+)['\"]", container_xml)
if m:
opf_path = m.group(1)
if opf_path in names:
return opf_path
candidates = sorted(n for n in names if n.lower().endswith(".opf"))
return candidates[0] if candidates else None
def scan_epub(path: Path) -> dict: def scan_epub(path: Path) -> dict:
out = { out = {
"has_cover": False, "has_cover": False,
@ -254,7 +243,7 @@ def scan_epub(path: Path) -> dict:
names = set(z.namelist()) names = set(z.namelist())
out["has_cover"] = extract_cover_from_epub(path) is not None out["has_cover"] = extract_cover_from_epub(path) is not None
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = _find_opf_path(names, container_xml) opf_path = find_opf_path(names, container_xml)
if not opf_path or opf_path not in names: if not opf_path or opf_path not in names:
return out return out
opf = z.read(opf_path).decode("utf-8", errors="replace") opf = z.read(opf_path).decode("utf-8", errors="replace")

View File

@ -1,9 +1,6 @@
import html as _html
import posixpath import posixpath
import re
import uuid import uuid
import zipfile as zf import zipfile as zf
from pathlib import Path
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from fastapi import APIRouter, Request from fastapi import APIRouter, Request
@ -11,150 +8,13 @@ from fastapi.responses import HTMLResponse, JSONResponse, Response
from shared_templates import templates from shared_templates import templates
from db import get_db_conn from db import get_db_conn
from epub import read_epub_file, write_epub_file from epub import make_intro_xhtml, read_epub_file, write_epub_file
from epub_utils import build_book_info_body_html, epub_spine, find_opf_path, make_new_chapter_xhtml, norm_href, rewrite_epub_entries
from routers.common import LIBRARY_DIR, is_db_filename, resolve_library_path, upsert_chapter from routers.common import LIBRARY_DIR, is_db_filename, resolve_library_path, upsert_chapter
router = APIRouter() router = APIRouter()
def _norm(base_dir: str, rel: str) -> str:
rel = (rel or "").split("#", 1)[0].strip()
if not rel:
return ""
joined = posixpath.normpath(posixpath.join(base_dir, rel))
return joined.lstrip("./")
def _epub_spine(path: Path) -> list[dict]:
with zf.ZipFile(path, "r") as z:
names = set(z.namelist())
opf_path = "OEBPS/content.opf"
try:
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace")
m = re.search(r"full-path\\s*=\\s*['\"]([^'\"]+)['\"]", container_xml)
if m:
opf_path = m.group(1)
except Exception:
pass
if opf_path not in names:
candidates = [n for n in names if n.lower().endswith(".opf")]
if not candidates:
return []
opf_path = sorted(candidates)[0]
opf_xml = z.read(opf_path).decode("utf-8", errors="replace")
opf = BeautifulSoup(opf_xml, "xml")
opf_dir = posixpath.dirname(opf_path)
manifest: dict[str, str] = {}
for item in opf.find_all("item"):
iid = item.get("id")
href = item.get("href")
if iid and href:
manifest[iid] = _norm(opf_dir, href)
spine_idrefs: list[str] = []
spine_tag = opf.find("spine")
toc_id = spine_tag.get("toc") if spine_tag else None
if spine_tag:
for ir in spine_tag.find_all("itemref"):
rid = ir.get("idref")
if rid:
spine_idrefs.append(rid)
hrefs = [manifest[rid] for rid in spine_idrefs if rid in manifest]
href_to_title: dict[str, str] = {}
ncx_path = ""
if toc_id and toc_id in manifest:
ncx_path = manifest[toc_id]
elif "toc.ncx" in names:
ncx_path = "toc.ncx"
elif "OEBPS/toc.ncx" in names:
ncx_path = "OEBPS/toc.ncx"
if ncx_path and ncx_path in names:
try:
ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace")
ncx = BeautifulSoup(ncx_xml, "xml")
ncx_dir = posixpath.dirname(ncx_path)
for np in ncx.find_all("navPoint"):
content = np.find("content")
label_tag = np.find("text")
src = content.get("src") if content else ""
label = label_tag.get_text(strip=True) if label_tag else ""
if src and label:
href_to_title[_norm(ncx_dir, src)] = _html.unescape(label)
except Exception:
pass
chapters = []
for i, href in enumerate(hrefs):
base = posixpath.basename(href)
title = href_to_title.get(href, re.sub(r"\.(xhtml|html|htm)$", "", base, flags=re.I))
chapters.append({"index": i, "title": title or f"Chapter {i+1}", "href": href})
return chapters
def _norm_href(base_dir: str, rel: str) -> str:
rel = (rel or "").split("#", 1)[0].strip()
if not rel:
return ""
return posixpath.normpath(posixpath.join(base_dir, rel)).lstrip("./")
def _find_opf_path(names: set[str], container_xml: str | None) -> str | None:
opf_path = "OEBPS/content.opf"
if container_xml:
m = re.search(r"full-path\s*=\s*['\"]([^'\"]+)['\"]", container_xml)
if m:
opf_path = m.group(1)
if opf_path in names:
return opf_path
candidates = sorted(n for n in names if n.lower().endswith(".opf"))
return candidates[0] if candidates else None
def _make_new_chapter_xhtml(title: str) -> str:
safe_title = _html.escape((title or "New chapter").strip() or "New chapter")
return (
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n"
"<head>\n"
" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n"
f" <title>{safe_title}</title>\n"
" <link rel=\"stylesheet\" type=\"text/css\" href=\"../Styles/style.css\"/>\n"
"</head>\n"
"<body>\n"
f" <h2 class=\"chapter-title\">{safe_title}</h2>\n"
" <p></p>\n"
"</body>\n"
"</html>\n"
)
def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_paths: set[str] | None = None) -> None:
remove_paths = set(remove_paths or set())
tmp = epub_path.with_suffix(".tmp.epub")
with zf.ZipFile(epub_path, "r") as zin, zf.ZipFile(tmp, "w", compression=zf.ZIP_DEFLATED) as zout:
names = zin.namelist()
for name in names:
if name in remove_paths:
continue
if name in updates:
zout.writestr(name, updates[name])
else:
zout.writestr(name, zin.read(name))
for name, data in updates.items():
if name not in names:
zout.writestr(name, data)
tmp.replace(epub_path)
@router.get("/library/editor/{filename:path}", response_class=HTMLResponse) @router.get("/library/editor/{filename:path}", response_class=HTMLResponse)
async def editor_page(filename: str, request: Request): async def editor_page(filename: str, request: Request):
if not is_db_filename(filename): if not is_db_filename(filename):
@ -194,7 +54,7 @@ async def get_edit_chapter(filename: str, index: int):
path = resolve_library_path(filename) path = resolve_library_path(filename)
if path is None or not path.exists(): if path is None or not path.exists():
return Response(status_code=404) return Response(status_code=404)
spine = _epub_spine(path) spine = epub_spine(path)
if index < 0 or index >= len(spine): if index < 0 or index >= len(spine):
return Response(status_code=404) return Response(status_code=404)
ch = spine[index] ch = spine[index]
@ -229,17 +89,77 @@ async def save_edit_chapter(filename: str, index: int, request: Request):
return JSONResponse({"error": "File not found"}, status_code=404) return JSONResponse({"error": "File not found"}, status_code=404)
if not content: if not content:
return JSONResponse({"error": "No content"}, status_code=400) return JSONResponse({"error": "No content"}, status_code=400)
spine = _epub_spine(path) spine = epub_spine(path)
if index < 0 or index >= len(spine): if index < 0 or index >= len(spine):
return JSONResponse({"error": "Chapter not found"}, status_code=404) return JSONResponse({"error": "Chapter not found"}, status_code=404)
href = spine[index]["href"] ch = spine[index]
href = ch["href"]
try: try:
write_epub_file(path, href, content) write_epub_file(path, href, content)
except Exception as e: except Exception as e:
return JSONResponse({"error": str(e)}, status_code=500) return JSONResponse({"error": str(e)}, status_code=500)
new_title = (body.get("title") or "").strip()
if new_title and new_title != (ch["title"] or ""):
try:
_update_epub_navpoint_title(path, href, new_title)
except Exception as e:
return JSONResponse({"error": f"Title update failed: {e}"}, status_code=500)
return JSONResponse({"ok": True}) return JSONResponse({"ok": True})
def _update_epub_navpoint_title(path, target_href: str, new_title: str) -> None:
"""Update the NCX navPoint label whose content src resolves to ``target_href``."""
with zf.ZipFile(path, "r") as z:
names = set(z.namelist())
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = find_opf_path(names, container_xml)
if not opf_path:
return
opf_xml = z.read(opf_path).decode("utf-8", errors="replace")
opf = BeautifulSoup(opf_xml, "xml")
opf_dir = posixpath.dirname(opf_path)
manifest: dict[str, str] = {}
for item in opf.find_all("item"):
iid = item.get("id")
href = item.get("href")
if iid and href:
manifest[iid] = norm_href(opf_dir, href)
spine_tag = opf.find("spine")
if not spine_tag:
return
toc_id = spine_tag.get("toc")
ncx_path = manifest.get(toc_id, "") if toc_id else ""
if not ncx_path:
for item in opf.find_all("item"):
mt = (item.get("media-type") or "").lower()
if mt == "application/x-dtbncx+xml" and item.get("href"):
ncx_path = norm_href(opf_dir, item.get("href"))
break
if not ncx_path or ncx_path not in names:
return
ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace")
ncx = BeautifulSoup(ncx_xml, "xml")
ncx_dir = posixpath.dirname(ncx_path)
changed = False
for np in ncx.find_all("navPoint"):
content_tag = np.find("content")
src = content_tag.get("src") if content_tag else ""
if src and norm_href(ncx_dir, src) == target_href:
text = np.find("text")
if text is not None:
text.string = new_title
changed = True
break
if changed:
rewrite_epub_entries(path, {ncx_path: str(ncx).encode("utf-8")})
@router.post("/api/edit/chapter/add/{filename:path}") @router.post("/api/edit/chapter/add/{filename:path}")
async def add_edit_chapter(filename: str, request: Request): async def add_edit_chapter(filename: str, request: Request):
body = await request.json() body = await request.json()
@ -261,10 +181,18 @@ async def add_edit_chapter(filename: str, request: Request):
insert_idx = total if after_index < 0 or after_index >= total else after_index + 1 insert_idx = total if after_index < 0 or after_index >= total else after_index + 1
with conn: with conn:
with conn.cursor() as cur: with conn.cursor() as cur:
# Two-step increment to avoid unique constraint violations.
# PostgreSQL checks the constraint per-row, so incrementing
# consecutive indices in a single UPDATE (1→2 while 2 exists)
# raises a UniqueViolation. Using negatives as a safe intermediate.
cur.execute( cur.execute(
"UPDATE book_chapters SET chapter_index = chapter_index + 1 WHERE filename = %s AND chapter_index >= %s", "UPDATE book_chapters SET chapter_index = -(chapter_index + 1) WHERE filename = %s AND chapter_index >= %s",
(filename, insert_idx), (filename, insert_idx),
) )
cur.execute(
"UPDATE book_chapters SET chapter_index = -chapter_index WHERE filename = %s AND chapter_index < 0",
(filename,),
)
upsert_chapter(conn, filename, insert_idx, title, "") upsert_chapter(conn, filename, insert_idx, title, "")
return JSONResponse({"ok": True, "index": insert_idx, "count": total + 1}) return JSONResponse({"ok": True, "index": insert_idx, "count": total + 1})
@ -274,10 +202,6 @@ async def add_edit_chapter(filename: str, request: Request):
if not path.exists(): if not path.exists():
return JSONResponse({"error": "File not found"}, status_code=404) return JSONResponse({"error": "File not found"}, status_code=404)
try:
after_index = int(after_index)
except Exception:
after_index = -1
try: try:
after_index = int(after_index) after_index = int(after_index)
except Exception: except Exception:
@ -286,7 +210,7 @@ async def add_edit_chapter(filename: str, request: Request):
with zf.ZipFile(path, "r") as z: with zf.ZipFile(path, "r") as z:
names = set(z.namelist()) names = set(z.namelist())
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = _find_opf_path(names, container_xml) opf_path = find_opf_path(names, container_xml)
if not opf_path: if not opf_path:
return JSONResponse({"error": "OPF not found"}, status_code=400) return JSONResponse({"error": "OPF not found"}, status_code=400)
@ -299,7 +223,7 @@ async def add_edit_chapter(filename: str, request: Request):
iid = item.get("id") iid = item.get("id")
href = item.get("href") href = item.get("href")
if iid and href: if iid and href:
manifest[iid] = _norm_href(opf_dir, href) manifest[iid] = norm_href(opf_dir, href)
spine_tag = opf.find("spine") spine_tag = opf.find("spine")
if not spine_tag: if not spine_tag:
@ -323,7 +247,7 @@ async def add_edit_chapter(filename: str, request: Request):
while True: while True:
stem = f"chapter_added_{uuid.uuid4().hex[:8]}" stem = f"chapter_added_{uuid.uuid4().hex[:8]}"
rel = posixpath.join(ref_dir_rel, f"{stem}.xhtml") if ref_dir_rel else f"{stem}.xhtml" rel = posixpath.join(ref_dir_rel, f"{stem}.xhtml") if ref_dir_rel else f"{stem}.xhtml"
abs_path = _norm_href(opf_dir, rel) abs_path = norm_href(opf_dir, rel)
if abs_path not in names: if abs_path not in names:
break break
@ -357,7 +281,7 @@ async def add_edit_chapter(filename: str, request: Request):
for item in opf.find_all("item"): for item in opf.find_all("item"):
mt = (item.get("media-type") or "").lower() mt = (item.get("media-type") or "").lower()
if mt == "application/x-dtbncx+xml" and item.get("href"): if mt == "application/x-dtbncx+xml" and item.get("href"):
ncx_path = _norm_href(opf_dir, item.get("href")) ncx_path = norm_href(opf_dir, item.get("href"))
break break
updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")} updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")}
@ -390,14 +314,191 @@ async def add_edit_chapter(filename: str, request: Request):
updates[ncx_path] = str(ncx).encode("utf-8") updates[ncx_path] = str(ncx).encode("utf-8")
updates[abs_path] = _make_new_chapter_xhtml(title).encode("utf-8") updates[abs_path] = make_new_chapter_xhtml(title).encode("utf-8")
_rewrite_epub_entries(path, updates) rewrite_epub_entries(path, updates)
new_spine = _epub_spine(path) new_spine = epub_spine(path)
new_index = min(max(after_index + 1, 0), max(len(new_spine) - 1, 0)) new_index = min(max(after_index + 1, 0), max(len(new_spine) - 1, 0))
return JSONResponse({"ok": True, "index": new_index, "count": len(new_spine)}) return JSONResponse({"ok": True, "index": new_index, "count": len(new_spine)})
def _load_book_info_from_db(filename: str) -> tuple[dict, str, str] | None:
"""Return (book_info, title, author) for the book, or None if unknown."""
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""SELECT title, author, publisher, series, series_index, publication_status,
source_url, description, publish_date
FROM library WHERE filename = %s""",
(filename,),
)
meta_row = cur.fetchone()
if not meta_row:
return None
cur.execute(
"SELECT tag, tag_type FROM book_tags WHERE filename = %s ORDER BY tag_type, tag",
(filename,),
)
tag_rows = cur.fetchall()
(title, author, publisher, series, series_index,
pub_status, source_url, description, pub_date) = meta_row
genres = [t for t, tp in tag_rows if tp == "genre"]
subgenres = [t for t, tp in tag_rows if tp == "subgenre"]
tags = [t for t, tp in tag_rows if tp in ("tag", "subject")]
info = {
"genres": genres,
"subgenres": subgenres,
"tags": tags,
"description": description or "",
"source_url": source_url or "",
"publisher": publisher or "",
"series": series or "",
"series_index": series_index or 1,
"publication_status": pub_status or "",
"updated_date": pub_date.isoformat() if pub_date else "",
}
return info, title or "", author or ""
@router.post("/api/edit/intro/{filename:path}")
async def add_intro_chapter(filename: str):
"""Insert a generated 'Book Info' page as the first chapter/spine item."""
loaded = _load_book_info_from_db(filename)
if loaded is None:
return JSONResponse({"error": "not found"}, status_code=404)
info, title, author = loaded
if is_db_filename(filename):
body_html = build_book_info_body_html(title, author, info)
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
# Shift existing chapter indices up by 1 via two-step
# negation to avoid unique-constraint violations.
cur.execute(
"UPDATE book_chapters SET chapter_index = -(chapter_index + 1) WHERE filename = %s",
(filename,),
)
cur.execute(
"UPDATE book_chapters SET chapter_index = -chapter_index WHERE filename = %s AND chapter_index < 0",
(filename,),
)
upsert_chapter(conn, filename, 0, "Book Info", body_html)
return JSONResponse({"ok": True, "index": 0})
path = resolve_library_path(filename)
if path is None or not path.exists():
return JSONResponse({"error": "File not found"}, status_code=404)
intro_xhtml = make_intro_xhtml(title, author, info)
with zf.ZipFile(path, "r") as z:
names = set(z.namelist())
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = find_opf_path(names, container_xml)
if not opf_path:
return JSONResponse({"error": "OPF not found"}, status_code=400)
opf_xml = z.read(opf_path).decode("utf-8", errors="replace")
opf = BeautifulSoup(opf_xml, "xml")
opf_dir = posixpath.dirname(opf_path)
manifest: dict[str, str] = {}
for item in opf.find_all("item"):
iid = item.get("id")
href = item.get("href")
if iid and href:
manifest[iid] = norm_href(opf_dir, href)
spine_tag = opf.find("spine")
if not spine_tag:
return JSONResponse({"error": "Invalid OPF spine"}, status_code=400)
itemrefs = spine_tag.find_all("itemref")
ref_dir_rel = "Text"
if itemrefs:
ref_idref = itemrefs[0].get("idref", "")
ref_abs = manifest.get(ref_idref, "")
if ref_abs:
ref_rel = posixpath.relpath(ref_abs, opf_dir)
ref_dir_rel = posixpath.dirname(ref_rel) or ""
while True:
stem = f"intro_{uuid.uuid4().hex[:8]}"
rel = posixpath.join(ref_dir_rel, f"{stem}.xhtml") if ref_dir_rel else f"{stem}.xhtml"
abs_path = norm_href(opf_dir, rel)
if abs_path not in names:
break
existing_ids = {item.get("id") for item in opf.find_all("item") if item.get("id")}
i = 1
new_id = f"intro_add_{i:03d}"
while new_id in existing_ids:
i += 1
new_id = f"intro_add_{i:03d}"
manifest_tag = opf.find("manifest")
if not manifest_tag:
return JSONResponse({"error": "Invalid OPF manifest"}, status_code=400)
new_item = opf.new_tag("item")
new_item["id"] = new_id
new_item["href"] = rel
new_item["media-type"] = "application/xhtml+xml"
manifest_tag.append(new_item)
new_itemref = opf.new_tag("itemref")
new_itemref["idref"] = new_id
if itemrefs:
itemrefs[0].insert_before(new_itemref)
else:
spine_tag.append(new_itemref)
toc_id = spine_tag.get("toc")
ncx_path = manifest.get(toc_id, "") if toc_id else ""
if not ncx_path:
for item in opf.find_all("item"):
mt = (item.get("media-type") or "").lower()
if mt == "application/x-dtbncx+xml" and item.get("href"):
ncx_path = norm_href(opf_dir, item.get("href"))
break
updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")}
if ncx_path and ncx_path in names:
ncx_xml = z.read(ncx_path).decode("utf-8", errors="replace")
ncx = BeautifulSoup(ncx_xml, "xml")
nav_map = ncx.find("navMap")
if nav_map:
np = ncx.new_tag("navPoint")
np["id"] = f"{new_id}_nav"
label = ncx.new_tag("navLabel")
text = ncx.new_tag("text")
text.string = "Book Info"
label.append(text)
content = ncx.new_tag("content")
ncx_dir = posixpath.dirname(ncx_path)
content["src"] = posixpath.relpath(abs_path, ncx_dir)
np.append(label)
np.append(content)
first_nav = nav_map.find("navPoint")
if first_nav:
first_nav.insert_before(np)
else:
nav_map.append(np)
for idx, node in enumerate(nav_map.find_all("navPoint"), 1):
node["playOrder"] = str(idx)
updates[ncx_path] = str(ncx).encode("utf-8")
updates[abs_path] = intro_xhtml.encode("utf-8")
rewrite_epub_entries(path, updates)
return JSONResponse({"ok": True, "index": 0})
@router.delete("/api/edit/chapter/{index:int}/{filename:path}") @router.delete("/api/edit/chapter/{index:int}/{filename:path}")
async def delete_edit_chapter(filename: str, index: int): async def delete_edit_chapter(filename: str, index: int):
if is_db_filename(filename): if is_db_filename(filename):
@ -429,7 +530,7 @@ async def delete_edit_chapter(filename: str, index: int):
with zf.ZipFile(path, "r") as z: with zf.ZipFile(path, "r") as z:
names = set(z.namelist()) names = set(z.namelist())
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = _find_opf_path(names, container_xml) opf_path = find_opf_path(names, container_xml)
if not opf_path: if not opf_path:
return JSONResponse({"error": "OPF not found"}, status_code=400) return JSONResponse({"error": "OPF not found"}, status_code=400)
@ -442,7 +543,7 @@ async def delete_edit_chapter(filename: str, index: int):
iid = item.get("id") iid = item.get("id")
href = item.get("href") href = item.get("href")
if iid and href: if iid and href:
manifest[iid] = _norm_href(opf_dir, href) manifest[iid] = norm_href(opf_dir, href)
spine_tag = opf.find("spine") spine_tag = opf.find("spine")
if not spine_tag: if not spine_tag:
@ -474,7 +575,7 @@ async def delete_edit_chapter(filename: str, index: int):
for item in opf.find_all("item"): for item in opf.find_all("item"):
mt = (item.get("media-type") or "").lower() mt = (item.get("media-type") or "").lower()
if mt == "application/x-dtbncx+xml" and item.get("href"): if mt == "application/x-dtbncx+xml" and item.get("href"):
ncx_path = _norm_href(opf_dir, item.get("href")) ncx_path = norm_href(opf_dir, item.get("href"))
break break
updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")} updates: dict[str, bytes] = {opf_path: str(opf).encode("utf-8")}
@ -489,13 +590,13 @@ async def delete_edit_chapter(filename: str, index: int):
for np in nav_map.find_all("navPoint"): for np in nav_map.find_all("navPoint"):
content = np.find("content") content = np.find("content")
src = content.get("src") if content else "" src = content.get("src") if content else ""
if src and _norm_href(ncx_dir, src) == target_href: if src and norm_href(ncx_dir, src) == target_href:
np.decompose() np.decompose()
for idx, node in enumerate(nav_map.find_all("navPoint"), 1): for idx, node in enumerate(nav_map.find_all("navPoint"), 1):
node["playOrder"] = str(idx) node["playOrder"] = str(idx)
updates[ncx_path] = str(ncx).encode("utf-8") updates[ncx_path] = str(ncx).encode("utf-8")
_rewrite_epub_entries(path, updates, remove_paths) rewrite_epub_entries(path, updates, remove_paths)
new_spine = _epub_spine(path) new_spine = epub_spine(path)
new_index = min(index, max(len(new_spine) - 1, 0)) new_index = min(index, max(len(new_spine) - 1, 0))
return JSONResponse({"ok": True, "index": new_index, "count": len(new_spine)}) return JSONResponse({"ok": True, "index": new_index, "count": len(new_spine)})

View File

@ -15,6 +15,7 @@ from shared_templates import templates
from db import get_db_conn from db import get_db_conn
from epub import detect_image_format, make_chapter_xhtml, make_epub from epub import detect_image_format, make_chapter_xhtml, make_epub
from epub_utils import build_book_info_body_html
from routers.common import ( from routers.common import (
LIBRARY_DIR, LIBRARY_DIR,
ensure_unique_db_filename, ensure_unique_db_filename,
@ -31,7 +32,7 @@ from routers.common import (
from scrapers import get_scraper from scrapers import get_scraper
from scrapers.base import HEADERS from scrapers.base import HEADERS
from security import decrypt_value, encrypt_value, is_encrypted_value from security import decrypt_value, encrypt_value, is_encrypted_value
from xhtml import configure_break_patterns, element_to_xhtml, is_break_element from xhtml import collapse_consecutive_breaks, configure_break_patterns, element_to_xhtml, is_break_element
router = APIRouter() router = APIRouter()
@ -215,13 +216,15 @@ async def debug_run(request: Request):
"selector_id": ch_data.get("selector_id"), "selector_id": ch_data.get("selector_id"),
"selector_class": ch_data.get("selector_class"), "selector_class": ch_data.get("selector_class"),
"raw_html": raw_html[:8000], "raw_html": raw_html[:8000],
"converted_xhtml": "\n".join(xhtml_parts)[:8000], "converted_xhtml": collapse_consecutive_breaks("\n".join(xhtml_parts))[:8000],
} }
except Exception as e: except Exception as e:
result["first_chapter"] = {"title": ch["title"], "url": ch["url"], "error": str(e)} result["first_chapter"] = {"title": ch["title"], "url": ch["url"], "error": str(e)}
except Exception: except Exception:
result["error"] = traceback.format_exc() result["error"] = traceback.format_exc()
finally:
await scraper.close()
return result return result
@ -279,14 +282,15 @@ async def preload(request: Request):
except ValueError as e: except ValueError as e:
return {"error": str(e)} return {"error": str(e)}
async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: try:
if username: async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client:
await scraper.login(client, username, password) if username:
book = await scraper.fetch_book_info(client, url) await scraper.login(client, username, password)
series = book.get("series", "") book = await scraper.fetch_book_info(client, url)
hint = int(book.get("series_index_hint", 0) or 0) series = book.get("series", "")
title = book.get("title", "") hint = int(book.get("series_index_hint", 0) or 0)
author = book.get("author", "") title = book.get("title", "")
author = book.get("author", "")
existing_books = [] existing_books = []
if title or author: if title or author:
@ -328,6 +332,8 @@ async def preload(request: Request):
"already_exists": bool(existing_books), "already_exists": bool(existing_books),
"existing_books": existing_books, "existing_books": existing_books,
} }
finally:
await scraper.close()
async def scrape_book(job_id: str, url: str, username: str, password: str) -> None: async def scrape_book(job_id: str, url: str, username: str, password: str) -> None:
@ -349,242 +355,247 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send)
scraper = get_scraper(url) scraper = get_scraper(url)
async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client: try:
if username: async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True, timeout=30) as client:
send("status", {"message": "Logging in..."}) if username:
await scraper.login(client, username, password) send("status", {"message": "Logging in..."})
await scraper.login(client, username, password)
book = await scraper.fetch_book_info(client, url) book = await scraper.fetch_book_info(client, url)
book_title = book["title"] book_title = book["title"]
author = book["author"] author = book["author"]
send("meta", {"title": book_title, "author": author}) send("meta", {"title": book_title, "author": author})
if not book.get("chapters"): if not book.get("chapters"):
send("error", {"message": "No chapters found. Check the URL or credentials."}) send("error", {"message": "No chapters found. Check the URL or credentials."})
job["done"] = True job["done"] = True
return return
send("chapters", {"chapters": [c["title"] for c in book["chapters"]]}) send("chapters", {"chapters": [c["title"] for c in book["chapters"]]})
send("status", {"message": f"Found {len(book['chapters'])} chapters. Downloading..."}) send("status", {"message": f"Found {len(book['chapters'])} chapters. Downloading..."})
cover_data: bytes | None = job.pop("cover_upload", None) cover_data: bytes | None = job.pop("cover_upload", None)
tags = list(book.get("tags", [])) tags = list(book.get("tags", []))
if len(book["chapters"]) < 4 and "Shorts" not in tags: if len(book["chapters"]) < 4 and "Shorts" not in tags:
tags.append("Shorts") tags.append("Shorts")
status_map = {"Temporary-Hold": "Temporary Hold"} status_map = {"Temporary-Hold": "Temporary Hold"}
pub_status = status_map.get(book.get("publication_status", ""), book.get("publication_status", "")) pub_status = status_map.get(book.get("publication_status", ""), book.get("publication_status", ""))
series = book.get("series", "") series = book.get("series", "")
series_index = int(job.get("series_index", 1) or 1) series_index = int(job.get("series_index", 1) or 1)
updated_date_override = (job.pop("updated_date_override", "") or "").strip() updated_date_override = (job.pop("updated_date_override", "") or "").strip()
final_updated_date = ( final_updated_date = (
updated_date_override updated_date_override
or book.get("updated_date", "") or book.get("updated_date", "")
or datetime.now(timezone.utc).strftime("%Y-%m-%d") or datetime.now(timezone.utc).strftime("%Y-%m-%d")
)
book_info = {
"genres": book.get("genres", []),
"subgenres": book.get("subgenres", []),
"tags": tags,
"description": book.get("description", ""),
"updated_date": final_updated_date,
"source_url": book.get("source_url", ""),
"publisher": book.get("publisher", ""),
"series": series,
"series_index": series_index,
"publication_status": pub_status,
}
_load_break_patterns()
storage_mode = job.get("storage_mode", "db")
# Break image path depends on storage mode:
# - EPUB: relative path inside the EPUB ZIP (break.png is embedded)
# - DB: absolute URL served by the static files handler
if storage_mode == "epub":
break_img_path = "../Images/break.png"
else:
break_img_path = "/static/break.png"
# Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]}
chapters = []
for i, ch in enumerate(book["chapters"], 1):
send("progress", {"current": i, "total": len(book["chapters"]), "title": ch["title"]})
try:
ch_data = await scraper.fetch_chapter(client, ch)
content_el = ch_data["content_el"]
# Download images and store to disk (no DB yet); rewrite src to absolute URL
if content_el:
for img_tag in content_el.find_all("img"):
if is_break_element(img_tag):
# Replace the parent with <hr> if it contains only
# this image, so element_to_xhtml can detect the break.
parent = img_tag.parent
meaningful = [
c for c in parent.children
if not (isinstance(c, NavigableString) and not c.strip())
]
if len(meaningful) == 1 and parent is not content_el:
parent.replace_with(BeautifulSoup("<hr/>", "html.parser").hr)
else:
img_tag.decompose()
continue
src = img_tag.get("src", "")
if not src or src.startswith("data:"):
img_tag.decompose()
continue
try:
img_resp = await client.get(urljoin(ch["url"], src))
if img_resp.status_code == 200:
_, img_mime = detect_image_format(
img_resp.content, f"ch{i:03d}_img"
)
sha, ext_i, url = write_image_file(img_resp.content, img_mime)
img_tag["src"] = url
img_tag["alt"] = img_tag.get("alt", "")
img_tag.attrs = {
k: v for k, v in img_tag.attrs.items()
if k in ("src", "alt", "width", "height")
}
else:
img_tag.decompose()
except Exception:
img_tag.decompose()
xhtml_parts = []
if content_el:
all_p = content_el.find_all("p")
empty_p = sum(
1
for p in all_p
if not [c for c in p.children if isinstance(c, Tag)]
and not p.get_text().replace("\xa0", "").strip()
)
filled_p = len(all_p) - empty_p
empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5
for child in content_el.children:
part = element_to_xhtml(child, break_img_path=break_img_path, empty_p_is_spacer=empty_p_is_spacer)
if part.strip():
xhtml_parts.append(part)
content_html = "\n".join(xhtml_parts)
chapters.append({"title": ch_data["title"], "content_html": content_html})
await asyncio.sleep(0.2)
except Exception as e:
send("warning", {"message": f"Chapter {i} skipped: {e}"})
if not chapters:
send("error", {"message": "No chapters could be processed."})
job["done"] = True
return
send("status", {"message": "Saving to library..."})
book_tags = (
[(g, "genre") for g in book_info.get("genres", [])]
+ [(g, "subgenre") for g in book_info.get("subgenres", [])]
+ [(g, "tag") for g in book_info.get("tags", [])]
)
if storage_mode == "epub":
# ── EPUB file on disk ──────────────────────────────────────────
epub_chapters = [
{"title": ch["title"], "xhtml": make_chapter_xhtml(ch["title"], ch["content_html"], i + 1), "images": []}
for i, ch in enumerate(chapters)
]
try:
break_img_data = open("static/break.png", "rb").read()
except Exception:
break_img_data = b""
epub_bytes = make_epub(
book_title, author, epub_chapters, cover_data, break_img_data,
str(uuid.uuid4()), book_info,
) )
rel_path = make_rel_path(
media_type="epub",
publisher=book_info.get("publisher", ""),
author=author,
title=book_title,
series=series,
series_index=series_index,
)
rel_path = ensure_unique_rel_path(rel_path)
out_path = LIBRARY_DIR / rel_path
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_bytes(epub_bytes)
rel_filename = rel_path.as_posix()
book_meta = { book_info = {
"media_type": "epub", "genres": book.get("genres", []),
"storage_type": "file", "subgenres": book.get("subgenres", []),
"has_cover": cover_data is not None, "tags": tags,
"description": book.get("description", ""),
"updated_date": final_updated_date,
"source_url": book.get("source_url", ""),
"publisher": book.get("publisher", ""),
"series": series, "series": series,
"series_index": series_index if series else 0, "series_index": series_index,
"title": book_title, "publication_status": pub_status,
"publication_status": book_info.get("publication_status", ""),
"author": author,
"publisher": book_info.get("publisher", ""),
"source_url": book_info.get("source_url", ""),
"description": book_info.get("description", ""),
"publish_date": final_updated_date,
"needs_review": False,
} }
with get_db_conn() as conn:
with conn:
upsert_book(conn, rel_filename, book_meta, book_tags)
if cover_data:
try:
thumb = make_cover_thumb_webp(cover_data)
upsert_cover_cache(conn, rel_filename, "image/webp", thumb)
except Exception:
pass
else: _load_break_patterns()
# ── DB storage (default) ───────────────────────────────────────
base_filename = make_rel_path(
media_type="db",
publisher=book_info.get("publisher", ""),
author=author,
title=book_title,
series=series,
series_index=series_index,
).as_posix()
book_meta = { storage_mode = job.get("storage_mode", "db")
"media_type": "epub", # Break image path depends on storage mode:
"storage_type": "db", # - EPUB: relative path inside the EPUB ZIP (break.png is embedded)
"has_cover": cover_data is not None, # - DB: absolute URL served by the static files handler
"series": book_info.get("series", ""), if storage_mode == "epub":
"series_index": series_index if book_info.get("series") else 0, break_img_path = "../Images/break.png"
"title": book_title, else:
"publication_status": book_info.get("publication_status", ""), break_img_path = "/static/break.png"
"author": author,
"publisher": book_info.get("publisher", ""),
"source_url": book_info.get("source_url", ""),
"description": book_info.get("description", ""),
"publish_date": final_updated_date,
"needs_review": False,
}
with get_db_conn() as conn:
with conn:
rel_filename = ensure_unique_db_filename(conn, base_filename)
upsert_book(conn, rel_filename, book_meta, book_tags)
for idx, ch in enumerate(chapters):
upsert_chapter(conn, rel_filename, idx, ch["title"], ch["content_html"])
if cover_data:
try:
thumb = make_cover_thumb_webp(cover_data)
upsert_cover_cache(conn, rel_filename, "image/webp", thumb)
except Exception:
pass
job["filename"] = rel_filename # Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]}
send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters), "storage_type": storage_mode}) chapters = []
job["done"] = True for i, ch in enumerate(book["chapters"], 1):
send("progress", {"current": i, "total": len(book["chapters"]), "title": ch["title"]})
try:
ch_data = await scraper.fetch_chapter(client, ch)
content_el = ch_data["content_el"]
# Download images and store to disk (no DB yet); rewrite src to absolute URL
if content_el:
for img_tag in content_el.find_all("img"):
if is_break_element(img_tag):
# Replace the parent with <hr> if it contains only
# this image, so element_to_xhtml can detect the break.
parent = img_tag.parent
meaningful = [
c for c in parent.children
if not (isinstance(c, NavigableString) and not c.strip())
]
if len(meaningful) == 1 and parent is not content_el:
parent.replace_with(BeautifulSoup("<hr/>", "html.parser").hr)
else:
img_tag.decompose()
continue
src = img_tag.get("src", "")
if not src or src.startswith("data:"):
img_tag.decompose()
continue
try:
img_resp = await client.get(urljoin(ch["url"], src))
if img_resp.status_code == 200:
_, img_mime = detect_image_format(
img_resp.content, f"ch{i:03d}_img"
)
sha, ext_i, url = write_image_file(img_resp.content, img_mime)
img_tag["src"] = url
img_tag["alt"] = img_tag.get("alt", "")
img_tag.attrs = {
k: v for k, v in img_tag.attrs.items()
if k in ("src", "alt", "width", "height")
}
else:
img_tag.decompose()
except Exception:
img_tag.decompose()
xhtml_parts = []
if content_el:
all_p = content_el.find_all("p")
empty_p = sum(
1
for p in all_p
if not [c for c in p.children if isinstance(c, Tag)]
and not p.get_text().replace("\xa0", "").strip()
)
filled_p = len(all_p) - empty_p
empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5
for child in content_el.children:
part = element_to_xhtml(child, break_img_path=break_img_path, empty_p_is_spacer=empty_p_is_spacer)
if part.strip():
xhtml_parts.append(part)
content_html = collapse_consecutive_breaks("\n".join(xhtml_parts))
chapters.append({"title": ch_data["title"], "content_html": content_html})
await asyncio.sleep(0.2)
except Exception as e:
send("warning", {"message": f"Chapter {i} skipped: {e}"})
if not chapters:
send("error", {"message": "No chapters could be processed."})
job["done"] = True
return
send("status", {"message": "Saving to library..."})
book_tags = (
[(g, "genre") for g in book_info.get("genres", [])]
+ [(g, "subgenre") for g in book_info.get("subgenres", [])]
+ [(g, "tag") for g in book_info.get("tags", [])]
)
if storage_mode == "epub":
# ── EPUB file on disk ──────────────────────────────────────────
epub_chapters = [
{"title": ch["title"], "xhtml": make_chapter_xhtml(ch["title"], ch["content_html"], i + 1), "images": []}
for i, ch in enumerate(chapters)
]
try:
break_img_data = open("static/break.png", "rb").read()
except Exception:
break_img_data = b""
epub_bytes = make_epub(
book_title, author, epub_chapters, cover_data, break_img_data,
str(uuid.uuid4()), book_info,
)
rel_path = make_rel_path(
media_type="epub",
publisher=book_info.get("publisher", ""),
author=author,
title=book_title,
series=series,
series_index=series_index,
)
rel_path = ensure_unique_rel_path(rel_path)
out_path = LIBRARY_DIR / rel_path
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_bytes(epub_bytes)
rel_filename = rel_path.as_posix()
book_meta = {
"media_type": "epub",
"storage_type": "file",
"has_cover": cover_data is not None,
"series": series,
"series_index": series_index if series else 0,
"title": book_title,
"publication_status": book_info.get("publication_status", ""),
"author": author,
"publisher": book_info.get("publisher", ""),
"source_url": book_info.get("source_url", ""),
"description": book_info.get("description", ""),
"publish_date": final_updated_date,
"needs_review": True,
}
with get_db_conn() as conn:
with conn:
upsert_book(conn, rel_filename, book_meta, book_tags)
if cover_data:
try:
thumb = make_cover_thumb_webp(cover_data)
upsert_cover_cache(conn, rel_filename, "image/webp", thumb)
except Exception:
pass
else:
# ── DB storage (default) ───────────────────────────────────────
base_filename = make_rel_path(
media_type="db",
publisher=book_info.get("publisher", ""),
author=author,
title=book_title,
series=series,
series_index=series_index,
).as_posix()
book_meta = {
"media_type": "epub",
"storage_type": "db",
"has_cover": cover_data is not None,
"series": book_info.get("series", ""),
"series_index": series_index if book_info.get("series") else 0,
"title": book_title,
"publication_status": book_info.get("publication_status", ""),
"author": author,
"publisher": book_info.get("publisher", ""),
"source_url": book_info.get("source_url", ""),
"description": book_info.get("description", ""),
"publish_date": final_updated_date,
"needs_review": True,
}
info_body = build_book_info_body_html(book_title, author, book_info)
with get_db_conn() as conn:
with conn:
rel_filename = ensure_unique_db_filename(conn, base_filename)
upsert_book(conn, rel_filename, book_meta, book_tags)
upsert_chapter(conn, rel_filename, 0, "Book Info", info_body)
for idx, ch in enumerate(chapters):
upsert_chapter(conn, rel_filename, idx + 1, ch["title"], ch["content_html"])
if cover_data:
try:
thumb = make_cover_thumb_webp(cover_data)
upsert_cover_cache(conn, rel_filename, "image/webp", thumb)
except Exception:
pass
job["filename"] = rel_filename
send("done", {"filename": rel_filename, "title": book_title, "chapters": len(chapters), "storage_type": storage_mode})
job["done"] = True
finally:
await scraper.close()
@router.post("/convert") @router.post("/convert")
@ -612,6 +623,8 @@ async def convert(request: Request):
job["updated_date_override"] = (body.get("updated_date") or "").strip() job["updated_date_override"] = (body.get("updated_date") or "").strip()
job["storage_mode"] = "epub" if body.get("storage_mode") == "epub" else "db" job["storage_mode"] = "epub" if body.get("storage_mode") == "epub" else "db"
while len(JOBS) >= 50:
JOBS.pop(next(iter(JOBS)))
JOBS[job_id] = job JOBS[job_id] = job
asyncio.create_task(scrape_book(job_id, url, username, password)) asyncio.create_task(scrape_book(job_id, url, username, password))
return {"job_id": job_id, "using_credentials": bool(username)} return {"job_id": job_id, "using_credentials": bool(username)}

View File

@ -80,12 +80,25 @@ async def api_library(
if rescan: if rescan:
_sync_disk_to_db() _sync_disk_to_db()
# ETag based on row count + latest updated_at — cheap query before full load. # ETag based on row count + latest updated_at across library, reading_progress, and book_tags.
with get_db_conn() as conn: with get_db_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("SELECT COUNT(*), MAX(updated_at) FROM library") cur.execute(
_count, _max_ts = cur.fetchone() """
etag = f'"{_count}-{int(_max_ts.timestamp()) if _max_ts else 0}"' SELECT
(SELECT COUNT(*) FROM library),
(SELECT MAX(updated_at) FROM library),
(SELECT MAX(updated_at) FROM reading_progress),
(SELECT MAX(id) FROM book_tags)
"""
)
_count, _max_ts, _rp_ts, _bt_id = cur.fetchone()
etag = (
f'"{_count}'
f'-{int(_max_ts.timestamp()) if _max_ts else 0}'
f'-{int(_rp_ts.timestamp()) if _rp_ts else 0}'
f'-{_bt_id or 0}"'
)
if request and request.headers.get("if-none-match") == etag: if request and request.headers.get("if-none-match") == etag:
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"}) return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
@ -216,6 +229,9 @@ async def library_bulk_delete(request: Request):
for filename in filenames: for filename in filenames:
if not isinstance(filename, str): if not isinstance(filename, str):
continue continue
if is_db_filename(filename):
deleted.append(filename)
continue
full = resolve_library_path(filename) full = resolve_library_path(filename)
if full is None: if full is None:
skipped.append(filename) skipped.append(filename)
@ -403,8 +419,7 @@ async def library_add_cover(filename: str, request: Request):
@router.post("/library/want-to-read/{filename:path}") @router.post("/library/want-to-read/{filename:path}")
async def library_want_to_read(filename: str): async def library_want_to_read(filename: str):
full = resolve_library_path(filename) if not is_db_filename(filename) and resolve_library_path(filename) is None:
if full is None:
return {"error": "Invalid filename"} return {"error": "Invalid filename"}
with get_db_conn() as conn: with get_db_conn() as conn:
with conn: with conn:
@ -423,8 +438,7 @@ async def library_want_to_read(filename: str):
@router.post("/library/archive/{filename:path}") @router.post("/library/archive/{filename:path}")
async def library_archive(filename: str): async def library_archive(filename: str):
full = resolve_library_path(filename) if not is_db_filename(filename) and resolve_library_path(filename) is None:
if full is None:
return {"error": "Invalid filename"} return {"error": "Invalid filename"}
with get_db_conn() as conn: with get_db_conn() as conn:
with conn: with conn:
@ -474,8 +488,7 @@ async def library_mark_new_reviewed(request: Request):
name = raw.strip() name = raw.strip()
if not name or name in seen: if not name or name in seen:
continue continue
full = resolve_library_path(name) if not is_db_filename(name) and resolve_library_path(name) is None:
if full is None:
continue continue
cleaned.append(name) cleaned.append(name)
seen.add(name) seen.add(name)

View File

@ -2,7 +2,6 @@
import html as _html import html as _html
import io import io
import posixpath
import re import re
import uuid import uuid
import zipfile as zf import zipfile as zf
@ -17,10 +16,13 @@ from shared_templates import templates
from cbr import cbr_get_page, cbr_page_count from cbr import cbr_get_page, cbr_page_count
from db import get_db_conn from db import get_db_conn
from epub import make_chapter_xhtml, make_epub, read_epub_file, write_epub_file from epub import make_chapter_xhtml, make_epub, read_epub_file, write_epub_file
from epub_utils import epub_spine, find_opf_path, rewrite_epub_entries
from pdf import pdf_page_count, pdf_render_page from pdf import pdf_page_count, pdf_render_page
from routers.common import ( from routers.common import (
IMAGES_DIR, IMAGES_DIR,
LIBRARY_DIR, LIBRARY_DIR,
clean_segment,
coerce_series_index,
ensure_unique_db_filename, ensure_unique_db_filename,
is_db_filename, is_db_filename,
make_cover_thumb_webp, make_cover_thumb_webp,
@ -35,159 +37,6 @@ from routers.common import (
router = APIRouter() router = APIRouter()
# ---------------------------------------------------------------------------
# EPUB helpers
# ---------------------------------------------------------------------------
def _epub_spine(path: Path) -> list[dict]:
"""Return ordered list of {index, title, href} for all spine items.
Supports both EPUB2 (toc.ncx) and EPUB3 (nav.xhtml), and respects
the OPF location declared in META-INF/container.xml.
"""
def _norm(base_dir: str, rel: str) -> str:
rel = (rel or '').split('#', 1)[0].strip()
if not rel:
return ''
joined = posixpath.normpath(posixpath.join(base_dir, rel))
return joined.lstrip('./')
with zf.ZipFile(path, 'r') as z:
names = set(z.namelist())
opf_path = 'OEBPS/content.opf'
try:
container_xml = z.read('META-INF/container.xml').decode('utf-8', errors='replace')
m = re.search(r"full-path\\s*=\\s*['\"]([^'\"]+)['\"]", container_xml)
if m:
opf_path = m.group(1)
except Exception:
pass
if opf_path not in names:
# fallback for malformed books
candidates = [n for n in names if n.lower().endswith('.opf')]
if not candidates:
return []
opf_path = sorted(candidates)[0]
opf_xml = z.read(opf_path).decode('utf-8', errors='replace')
opf = BeautifulSoup(opf_xml, 'xml')
opf_dir = posixpath.dirname(opf_path)
manifest: dict[str, str] = {}
for item in opf.find_all('item'):
iid = item.get('id')
href = item.get('href')
if iid and href:
manifest[iid] = _norm(opf_dir, href)
spine_idrefs: list[str] = []
spine_tag = opf.find('spine')
toc_id = spine_tag.get('toc') if spine_tag else None
if spine_tag:
for ir in spine_tag.find_all('itemref'):
rid = ir.get('idref')
if rid:
spine_idrefs.append(rid)
hrefs = [manifest[rid] for rid in spine_idrefs if rid in manifest]
href_to_title: dict[str, str] = {}
# EPUB2: NCX titles
ncx_path = ''
if toc_id and toc_id in manifest:
ncx_path = manifest[toc_id]
elif 'toc.ncx' in names:
ncx_path = 'toc.ncx'
elif 'OEBPS/toc.ncx' in names:
ncx_path = 'OEBPS/toc.ncx'
if ncx_path and ncx_path in names:
try:
ncx_xml = z.read(ncx_path).decode('utf-8', errors='replace')
ncx = BeautifulSoup(ncx_xml, 'xml')
ncx_dir = posixpath.dirname(ncx_path)
for np in ncx.find_all('navPoint'):
content = np.find('content')
label_tag = np.find('text')
src = content.get('src') if content else ''
label = label_tag.get_text(strip=True) if label_tag else ''
if src and label:
href_to_title[_norm(ncx_dir, src)] = _html.unescape(label)
except Exception:
pass
# EPUB3: nav.xhtml titles (fallback)
if not href_to_title:
nav_item = None
for item in opf.find_all('item'):
props = (item.get('properties') or '').split()
if 'nav' in props:
nav_item = item
break
if nav_item and nav_item.get('href'):
nav_path = _norm(opf_dir, nav_item.get('href'))
if nav_path in names:
try:
nav_xml = z.read(nav_path).decode('utf-8', errors='replace')
nav = BeautifulSoup(nav_xml, 'lxml')
nav_dir = posixpath.dirname(nav_path)
for a in nav.select('nav a[href]'):
src = a.get('href', '')
label = a.get_text(' ', strip=True)
if src and label:
href_to_title[_norm(nav_dir, src)] = _html.unescape(label)
except Exception:
pass
chapters = []
for i, href in enumerate(hrefs):
base = posixpath.basename(href)
title = href_to_title.get(href, re.sub(r'\.(xhtml|html|htm)$', '', base, flags=re.I))
chapters.append({'index': i, 'title': title or f'Chapter {i+1}', 'href': href})
return chapters
def _norm_href(base_dir: str, rel: str) -> str:
rel = (rel or '').split('#', 1)[0].strip()
if not rel:
return ''
return posixpath.normpath(posixpath.join(base_dir, rel)).lstrip('./')
def _find_opf_path(names: set[str], container_xml: str | None) -> str | None:
opf_path = 'OEBPS/content.opf'
if container_xml:
m = re.search(r'full-path\s*=\s*[\'"]([^\'"]+)[\'"]', container_xml)
if m:
opf_path = m.group(1)
if opf_path in names:
return opf_path
candidates = sorted(n for n in names if n.lower().endswith('.opf'))
return candidates[0] if candidates else None
def _make_new_chapter_xhtml(title: str) -> str:
safe_title = _html.escape((title or 'New chapter').strip() or 'New chapter')
return (
'<?xml version="1.0" encoding="UTF-8"?>\n'
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n'
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
'<head>\n'
' <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n'
f' <title>{safe_title}</title>\n'
' <link rel="stylesheet" type="text/css" href="../Styles/style.css"/>\n'
'</head>\n'
'<body>\n'
f' <h2 class="chapter-title">{safe_title}</h2>\n'
' <p></p>\n'
'</body>\n'
'</html>\n'
)
def _tag_local(name: str | None) -> str: def _tag_local(name: str | None) -> str:
if not name: if not name:
@ -200,7 +49,7 @@ def _write_epub_rating(epub_path: Path, rating: int) -> None:
with zf.ZipFile(epub_path, "r") as z: with zf.ZipFile(epub_path, "r") as z:
names = set(z.namelist()) names = set(z.namelist())
container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None container_xml = z.read("META-INF/container.xml").decode("utf-8", errors="replace") if "META-INF/container.xml" in names else None
opf_path = _find_opf_path(names, container_xml) opf_path = find_opf_path(names, container_xml)
if not opf_path or opf_path not in names: if not opf_path or opf_path not in names:
return return
opf_xml = z.read(opf_path).decode("utf-8", errors="replace") opf_xml = z.read(opf_path).decode("utf-8", errors="replace")
@ -220,7 +69,7 @@ def _write_epub_rating(epub_path: Path, rating: int) -> None:
nt["content"] = str(rating) nt["content"] = str(rating)
metadata.append(nt) metadata.append(nt)
_rewrite_epub_entries(epub_path, {opf_path: str(opf).encode("utf-8")}) rewrite_epub_entries(epub_path, {opf_path: str(opf).encode("utf-8")})
def _write_cbz_rating(cbz_path: Path, rating: int) -> None: def _write_cbz_rating(cbz_path: Path, rating: int) -> None:
@ -287,7 +136,7 @@ def _sync_epub_metadata(
with zf.ZipFile(epub_path, 'r') as z: with zf.ZipFile(epub_path, 'r') as z:
names = set(z.namelist()) names = set(z.namelist())
container_xml = z.read('META-INF/container.xml').decode('utf-8', errors='replace') if 'META-INF/container.xml' in names else None container_xml = z.read('META-INF/container.xml').decode('utf-8', errors='replace') if 'META-INF/container.xml' in names else None
opf_path = _find_opf_path(names, container_xml) opf_path = find_opf_path(names, container_xml)
if not opf_path or opf_path not in names: if not opf_path or opf_path not in names:
return return
opf_xml = z.read(opf_path).decode('utf-8', errors='replace') opf_xml = z.read(opf_path).decode('utf-8', errors='replace')
@ -357,7 +206,7 @@ def _sync_epub_metadata(
series_val = (series or '').strip() series_val = (series or '').strip()
set_named_meta('calibre:series', series_val) set_named_meta('calibre:series', series_val)
if series_val: if series_val:
set_named_meta('calibre:series_index', str(_coerce_series_index(series_index))) set_named_meta('calibre:series_index', str(coerce_series_index(series_index)))
sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5]
set_named_meta('novela:series_suffix', sfx) set_named_meta('novela:series_suffix', sfx)
set_named_meta('novela:series_volume', (series_volume or '').strip()[:20]) set_named_meta('novela:series_volume', (series_volume or '').strip()[:20])
@ -366,53 +215,7 @@ def _sync_epub_metadata(
set_named_meta('novela:series_suffix', '') set_named_meta('novela:series_suffix', '')
set_named_meta('novela:series_volume', '') set_named_meta('novela:series_volume', '')
_rewrite_epub_entries(epub_path, {opf_path: str(opf).encode('utf-8')}) rewrite_epub_entries(epub_path, {opf_path: str(opf).encode('utf-8')})
def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_paths: set[str] | None = None) -> None:
remove_paths = remove_paths or set()
with open(epub_path, 'rb') as f:
original = f.read()
out = io.BytesIO()
with zf.ZipFile(io.BytesIO(original), 'r') as zin, zf.ZipFile(out, 'w', zf.ZIP_DEFLATED) as zout:
existing = set()
for item in zin.infolist():
name = item.filename
existing.add(name)
if name in remove_paths:
continue
data = updates.get(name)
if data is None:
data = zin.read(name)
ctype = zf.ZIP_STORED if name == 'mimetype' else zf.ZIP_DEFLATED
zout.writestr(name, data, compress_type=ctype)
for name, data in updates.items():
if name in existing or name in remove_paths:
continue
ctype = zf.ZIP_STORED if name == 'mimetype' else zf.ZIP_DEFLATED
zout.writestr(name, data, compress_type=ctype)
with open(epub_path, 'wb') as f:
f.write(out.getvalue())
def _clean_segment(value: str, fallback: str, max_len: int = 100) -> str:
txt = re.sub(r"\s+", "_", (value or "").strip())
txt = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", txt)
txt = re.sub(r"\.+$", "", txt).strip("_")
if not txt:
txt = fallback
return txt[:max_len]
def _coerce_series_index(value: int | str | None) -> int:
try:
return max(0, min(999, int(value or 0)))
except (TypeError, ValueError):
return 0
def _make_rel_path( def _make_rel_path(
@ -425,27 +228,27 @@ def _make_rel_path(
series_suffix: str = "", series_suffix: str = "",
ext: str = ".epub", ext: str = ".epub",
) -> Path: ) -> Path:
auth = _clean_segment(author, "Unknown Author", 80) auth = clean_segment(author, "Unknown Author", 80)
ttl = _clean_segment(title, "Untitled", 140) ttl = clean_segment(title, "Untitled", 140)
if ext == ".epub": if ext == ".epub":
pub = _clean_segment(publisher, "Unknown Publisher", 80) pub = clean_segment(publisher, "Unknown Publisher", 80)
series_name = _clean_segment(series, "", 120) series_name = clean_segment(series, "", 120)
if series_name: if series_name:
idx = _coerce_series_index(series_index) idx = coerce_series_index(series_index)
sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5]
return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}.epub" return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}.epub"
return Path("epub") / pub / auth / "Stories" / f"{ttl}.epub" return Path("epub") / pub / auth / "Stories" / f"{ttl}.epub"
if ext == ".pdf": if ext == ".pdf":
pub = _clean_segment(publisher, "Unknown Publisher", 80) pub = clean_segment(publisher, "Unknown Publisher", 80)
return Path("pdf") / pub / auth / f"{ttl}.pdf" return Path("pdf") / pub / auth / f"{ttl}.pdf"
# .cbr / .cbz # .cbr / .cbz
pub = _clean_segment(publisher, "Unknown Publisher", 80) pub = clean_segment(publisher, "Unknown Publisher", 80)
series_name = _clean_segment(series, "", 80) series_name = clean_segment(series, "", 80)
if series_name: if series_name:
idx = _coerce_series_index(series_index) idx = coerce_series_index(series_index)
sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5]
return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}{ext}" return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}{ext}"
return Path("comics") / pub / auth / f"{ttl}{ext}" return Path("comics") / pub / auth / f"{ttl}{ext}"
@ -519,7 +322,7 @@ async def get_chapter_list(filename: str):
return Response(status_code=404) return Response(status_code=404)
if not path.exists(): if not path.exists():
return Response(status_code=404) return Response(status_code=404)
return _epub_spine(path) return epub_spine(path)
@router.get("/library/chapter/{index}/{filename:path}") @router.get("/library/chapter/{index}/{filename:path}")
@ -543,8 +346,9 @@ async def get_chapter_html(filename: str, index: int):
# Handles two layouts: # Handles two layouts:
# 1. <h1>…</h1> at the very start of content # 1. <h1>…</h1> at the very start of content
# 2. <section …>\n<h1>…</h1> (pandoc-style wrapping) # 2. <section …>\n<h1>…</h1> (pandoc-style wrapping)
content = re.sub(r'(?si)^(\s*<h[1-4](?:\s[^>]*)?>.*?</h[1-4]>)+\s*', '', content) if (title or "").strip() != "Book Info":
content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)<h[1-4][^>]*>.*?</h[1-4]>\s*', r'\1', content, count=1) content = re.sub(r'(?si)^(\s*<h[1-4](?:\s[^>]*)?>.*?</h[1-4]>)+\s*', '', content)
content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)<h[1-4][^>]*>.*?</h[1-4]>\s*', r'\1', content, count=1)
return Response( return Response(
f'<body><h2 class="chapter-title">{safe_title}</h2>\n{content}\n</body>', f'<body><h2 class="chapter-title">{safe_title}</h2>\n{content}\n</body>',
media_type="text/html", media_type="text/html",
@ -555,7 +359,7 @@ async def get_chapter_html(filename: str, index: int):
return Response(status_code=404) return Response(status_code=404)
if not path.exists(): if not path.exists():
return Response(status_code=404) return Response(status_code=404)
spine = _epub_spine(path) spine = epub_spine(path)
if index < 0 or index >= len(spine): if index < 0 or index >= len(spine):
return Response(status_code=404) return Response(status_code=404)
href = spine[index]["href"] href = spine[index]["href"]
@ -612,7 +416,7 @@ async def get_chapter_image(path: str, filename: str):
@router.get("/library/progress/{filename:path}") @router.get("/library/progress/{filename:path}")
async def get_progress(filename: str): async def get_progress(filename: str):
if resolve_library_path(filename) is None: if not is_db_filename(filename) and resolve_library_path(filename) is None:
return {"error": "Invalid filename"} return {"error": "Invalid filename"}
with get_db_conn() as conn: with get_db_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
@ -630,7 +434,7 @@ async def clear_progress(filename: str):
Reading sessions (mark-as-read history) are intentionally left intact. Reading sessions (mark-as-read history) are intentionally left intact.
""" """
if resolve_library_path(filename) is None: if not is_db_filename(filename) and resolve_library_path(filename) is None:
return {"error": "Invalid filename"} return {"error": "Invalid filename"}
with get_db_conn() as conn: with get_db_conn() as conn:
with conn: with conn:
@ -641,7 +445,7 @@ async def clear_progress(filename: str):
@router.post("/library/progress/{filename:path}") @router.post("/library/progress/{filename:path}")
async def save_progress(filename: str, request: Request): async def save_progress(filename: str, request: Request):
if resolve_library_path(filename) is None: if not is_db_filename(filename) and resolve_library_path(filename) is None:
return {"error": "Invalid filename"} return {"error": "Invalid filename"}
body = await request.json() body = await request.json()
cfi = body.get("cfi", "") cfi = body.get("cfi", "")
@ -652,9 +456,41 @@ async def save_progress(filename: str, request: Request):
except Exception: except Exception:
page = None page = None
progress = max(0, min(100, int(body.get("progress", 0)))) progress = max(0, min(100, int(body.get("progress", 0))))
def _parse_cfi(value):
# cfi format: "chapterIndex:scrollFrac" (scrollFrac optional)
if not value:
return (-1, 0.0)
parts = str(value).split(":", 1)
try:
idx = int(parts[0])
except Exception:
return (-1, 0.0)
frac = 0.0
if len(parts) > 1:
try:
frac = float(parts[1])
except Exception:
frac = 0.0
return (idx, frac)
new_pos = _parse_cfi(cfi)
with get_db_conn() as conn: with get_db_conn() as conn:
with conn: with conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(
"SELECT cfi, page, progress FROM reading_progress WHERE filename = %s",
(filename,),
)
row = cur.fetchone()
if row is not None:
cur_pos = _parse_cfi(row[0])
# Monotonic guard: only advance. cfi encodes (chapterIndex,
# scrollFrac) for all formats (epub/pdf/cbr), so a single
# tuple comparison is authoritative. Reset happens only via
# the explicit Read/Unread actions (which clear the row).
if new_pos <= cur_pos:
return {"ok": True, "skipped": True}
cur.execute( cur.execute(
""" """
INSERT INTO reading_progress (filename, cfi, page, progress, updated_at) INSERT INTO reading_progress (filename, cfi, page, progress, updated_at)
@ -1275,7 +1111,7 @@ async def convert_to_db(filename: str):
# Extract chapters from EPUB # Extract chapters from EPUB
try: try:
spine = _epub_spine(old_path) spine = epub_spine(old_path)
chapters = [] chapters = []
with zf.ZipFile(old_path, "r") as z: with zf.ZipFile(old_path, "r") as z:
for entry in spine: for entry in spine:
@ -1446,8 +1282,11 @@ async def export_epub(filename: str):
for ch_idx, ch_title, ch_content in ch_rows: for ch_idx, ch_title, ch_content in ch_rows:
# Strip leading h-tags from stored content (same logic as chapter endpoint) # Strip leading h-tags from stored content (same logic as chapter endpoint)
# to prevent double titles when make_chapter_xhtml prepends its own heading. # to prevent double titles when make_chapter_xhtml prepends its own heading.
ch_content = re.sub(r'(?si)^(\s*<h[1-4](?:\s[^>]*)?>.*?</h[1-4]>)+\s*', '', ch_content) # The "Book Info" chapter intentionally carries an <h1> book title that
ch_content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)<h[1-4][^>]*>.*?</h[1-4]>\s*', r'\1', ch_content, count=1) # must survive — so skip stripping for that one.
if (ch_title or "").strip() != "Book Info":
ch_content = re.sub(r'(?si)^(\s*<h[1-4](?:\s[^>]*)?>.*?</h[1-4]>)+\s*', '', ch_content)
ch_content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)<h[1-4][^>]*>.*?</h[1-4]>\s*', r'\1', ch_content, count=1)
modified_html, new_imgs = _rewrite_db_images_for_epub(ch_content, seen_images) modified_html, new_imgs = _rewrite_db_images_for_epub(ch_content, seen_images)
chapter_xhtml = make_chapter_xhtml(ch_title or f"Chapter {ch_idx + 1}", modified_html, ch_idx + 1) chapter_xhtml = make_chapter_xhtml(ch_title or f"Chapter {ch_idx + 1}", modified_html, ch_idx + 1)
chapters.append({"title": ch_title or f"Chapter {ch_idx + 1}", "xhtml": chapter_xhtml, "images": new_imgs}) chapters.append({"title": ch_title or f"Chapter {ch_idx + 1}", "xhtml": chapter_xhtml, "images": new_imgs})
@ -1461,6 +1300,7 @@ async def export_epub(filename: str):
epub_bytes = make_epub( epub_bytes = make_epub(
title or "Untitled", author or "Unknown", chapters, title or "Untitled", author or "Unknown", chapters,
cover_data, break_img_data, book_id, book_info, cover_data, break_img_data, book_id, book_info,
include_intro=False,
) )
safe_title = re.sub(r'[^\w\-. ]', '', (title or "book")).strip() or "book" safe_title = re.sub(r'[^\w\-. ]', '', (title or "book")).strip() or "book"
@ -1569,7 +1409,7 @@ async def library_cbr_page(filename: str, page: int = 0):
@router.get("/library/bookmarks/{filename:path}") @router.get("/library/bookmarks/{filename:path}")
async def get_bookmarks(filename: str): async def get_bookmarks(filename: str):
if resolve_library_path(filename) is None: if not is_db_filename(filename) and resolve_library_path(filename) is None:
return JSONResponse({"error": "Invalid filename"}, status_code=400) return JSONResponse({"error": "Invalid filename"}, status_code=400)
with get_db_conn() as conn: with get_db_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
@ -1597,7 +1437,7 @@ async def get_bookmarks(filename: str):
@router.post("/library/bookmarks/{filename:path}") @router.post("/library/bookmarks/{filename:path}")
async def add_bookmark(filename: str, request: Request): async def add_bookmark(filename: str, request: Request):
if resolve_library_path(filename) is None: if not is_db_filename(filename) and resolve_library_path(filename) is None:
return JSONResponse({"error": "Invalid filename"}, status_code=400) return JSONResponse({"error": "Invalid filename"}, status_code=400)
body = await request.json() body = await request.json()
chapter_index = int(body.get("chapter_index", 0)) chapter_index = int(body.get("chapter_index", 0))

View File

@ -4,7 +4,12 @@ from urllib.parse import urljoin, urlparse
import httpx import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from .base import BaseScraper from .base import (
BaseScraper,
flaresolverr_get,
flaresolverr_session_create,
flaresolverr_session_destroy,
)
LAYOUT_RE = re.compile( LAYOUT_RE = re.compile(
r"nav|menu|sidebar|header|footer|breadcrumb|pagination|" r"nav|menu|sidebar|header|footer|breadcrumb|pagination|"
@ -16,6 +21,9 @@ GENERIC_PAGE_TITLES = {"awesomedude home"}
class AwesomeDudeScraper(BaseScraper): class AwesomeDudeScraper(BaseScraper):
def __init__(self) -> None:
self._fs_session: str | None = None
@classmethod @classmethod
def matches(cls, url: str) -> bool: def matches(cls, url: str) -> bool:
return "awesomedude.org" in url return "awesomedude.org" in url
@ -23,8 +31,19 @@ class AwesomeDudeScraper(BaseScraper):
async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool: async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool:
return True # no login required return True # no login required
async def _ensure_session(self) -> str:
if self._fs_session is None:
self._fs_session = await flaresolverr_session_create()
return self._fs_session
async def close(self) -> None:
if self._fs_session:
await flaresolverr_session_destroy(self._fs_session)
self._fs_session = None
async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict:
r = await client.get(url) sid = await self._ensure_session()
r = await flaresolverr_get(url, session=sid)
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.text, "html.parser")
actual_url = str(r.url) actual_url = str(r.url)
@ -147,7 +166,8 @@ class AwesomeDudeScraper(BaseScraper):
} }
async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict:
cr = await client.get(ch["url"]) sid = await self._ensure_session()
cr = await flaresolverr_get(ch["url"], session=sid)
csoup = BeautifulSoup(cr.text, "html.parser") csoup = BeautifulSoup(cr.text, "html.parser")
title = ch["title"] title = ch["title"]
book_title_lc = ch.get("book_title", "").lower() book_title_lc = ch.get("book_title", "").lower()

View File

@ -1,4 +1,6 @@
import os
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from types import SimpleNamespace
import httpx import httpx
@ -6,6 +8,56 @@ HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
} }
FLARESOLVERR_URL = os.environ.get("FLARESOLVERR_URL", "http://flaresolverr:8191/v1")
FLARESOLVERR_TIMEOUT_MS = int(os.environ.get("FLARESOLVERR_TIMEOUT_MS", "60000"))
async def _flaresolverr_call(payload: dict, http_timeout: float) -> dict:
async with httpx.AsyncClient(timeout=http_timeout) as fs:
r = await fs.post(FLARESOLVERR_URL, json=payload)
r.raise_for_status()
return r.json()
async def flaresolverr_get(
url: str,
timeout_ms: int | None = None,
session: str | None = None,
) -> SimpleNamespace:
"""Fetch ``url`` through a FlareSolverr instance that solves Cloudflare challenges.
If ``session`` is given, the request reuses that FlareSolverr session so the
browser + Cloudflare cookies stay warm across multiple calls (much faster for
per-chapter scraping). Returns an object with ``.text`` and ``.url``.
"""
max_timeout = timeout_ms if timeout_ms is not None else FLARESOLVERR_TIMEOUT_MS
payload = {"cmd": "request.get", "url": url, "maxTimeout": max_timeout}
if session:
payload["session"] = session
data = await _flaresolverr_call(payload, (max_timeout / 1000) + 10)
if data.get("status") != "ok":
raise RuntimeError(
f"FlareSolverr could not fetch {url}: {data.get('message') or data}"
)
sol = data.get("solution") or {}
return SimpleNamespace(text=sol.get("response", ""), url=sol.get("url", url))
async def flaresolverr_session_create() -> str:
"""Create a FlareSolverr session and return its id."""
data = await _flaresolverr_call({"cmd": "sessions.create"}, 30)
if data.get("status") != "ok":
raise RuntimeError(f"FlareSolverr session create failed: {data.get('message') or data}")
return data["session"]
async def flaresolverr_session_destroy(session: str) -> None:
"""Destroy a FlareSolverr session. Errors are swallowed (best-effort cleanup)."""
try:
await _flaresolverr_call({"cmd": "sessions.destroy", "session": session}, 30)
except Exception:
pass
class BaseScraper(ABC): class BaseScraper(ABC):
"""Abstract base class for all site scrapers. """Abstract base class for all site scrapers.
@ -44,6 +96,10 @@ class BaseScraper(ABC):
Note: cover is not scraped. It is supplied by the user at convert time. Note: cover is not scraped. It is supplied by the user at convert time.
""" """
async def close(self) -> None:
"""Release any scraper-scoped resources (e.g. FlareSolverr sessions). Default: no-op."""
return None
@abstractmethod @abstractmethod
async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict:
"""Fetch a chapter page and extract its content element. """Fetch a chapter page and extract its content element.

View File

@ -26,7 +26,9 @@ class CodeysWorldScraper(BaseScraper):
async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict:
r = await client.get(url) r = await client.get(url)
soup = BeautifulSoup(r.text, "html.parser") # Codey's World is a Windows-hosted legacy site; cp1252 correctly maps
# the 0x800x9F range (…, ', ", —, etc.) that iso-8859-1 leaves undefined.
soup = BeautifulSoup(r.content.decode("cp1252", errors="replace"), "html.parser")
actual_url = str(r.url) actual_url = str(r.url)
# Title: <h1> # Title: <h1>
@ -116,7 +118,7 @@ class CodeysWorldScraper(BaseScraper):
async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict:
cr = await client.get(ch["url"]) cr = await client.get(ch["url"])
csoup = BeautifulSoup(cr.text, "html.parser") csoup = BeautifulSoup(cr.content.decode("cp1252", errors="replace"), "html.parser")
title = ch["title"] title = ch["title"]
book_title_lc = ch.get("book_title", "").lower() book_title_lc = ch.get("book_title", "").lower()
author_lc = ch.get("author", "").lower() author_lc = ch.get("author", "").lower()

View File

@ -1,4 +1,5 @@
import base64 import base64
import functools
import hashlib import hashlib
import os import os
@ -8,13 +9,16 @@ _PREFIX = "enc$"
def _master_secret() -> str: def _master_secret() -> str:
return ( secret = os.environ.get("NOVELA_MASTER_KEY") or os.environ.get("POSTGRES_PASSWORD")
os.environ.get("NOVELA_MASTER_KEY") if not secret:
or os.environ.get("POSTGRES_PASSWORD") raise RuntimeError(
or "novela-default-key-change-me" "No encryption key configured. Set NOVELA_MASTER_KEY "
) "(or POSTGRES_PASSWORD as fallback)."
)
return secret
@functools.lru_cache(maxsize=1)
def _fernet() -> Fernet: def _fernet() -> Fernet:
digest = hashlib.sha256(_master_secret().encode("utf-8")).digest() digest = hashlib.sha256(_master_secret().encode("utf-8")).digest()
key = base64.urlsafe_b64encode(digest) key = base64.urlsafe_b64encode(digest)

View File

@ -77,7 +77,8 @@ html, body { height: 100%; background: var(--bg); color: var(--text); font-famil
.btn-break:disabled { opacity: 0.3; cursor: not-allowed; } .btn-break:disabled { opacity: 0.3; cursor: not-allowed; }
.btn-break:not(:disabled):hover { color: var(--text); border-color: var(--text-faint); } .btn-break:not(:disabled):hover { color: var(--text); border-color: var(--text-faint); }
.btn-replace { .btn-replace,
.btn-info-page {
display: flex; align-items: center; gap: 0.35rem; display: flex; align-items: center; gap: 0.35rem;
padding: 0.3rem 0.7rem; padding: 0.3rem 0.7rem;
background: none; border: 1px solid var(--border); border-radius: var(--radius); background: none; border: 1px solid var(--border); border-radius: var(--radius);
@ -85,7 +86,48 @@ html, body { height: 100%; background: var(--bg); color: var(--text); font-famil
color: var(--text-dim); cursor: pointer; color: var(--text-dim); cursor: pointer;
transition: color 0.12s, border-color 0.12s; transition: color 0.12s, border-color 0.12s;
} }
.btn-replace:hover { color: var(--text); border-color: var(--text-faint); } .btn-replace:hover,
.btn-info-page:hover { color: var(--text); border-color: var(--text-faint); }
.btn-subheading {
padding: 0.3rem 0.6rem;
background: none; border: 1px solid rgba(224,62,45,0.4); border-radius: var(--radius);
font-family: var(--mono); font-size: 0.68rem; font-weight: bold;
color: rgb(224,62,45); cursor: pointer;
transition: background 0.12s, border-color 0.12s;
}
.btn-subheading:disabled { opacity: 0.3; cursor: not-allowed; }
.btn-subheading:not(:disabled):hover { background: rgba(224,62,45,0.1); border-color: rgb(224,62,45); }
.btn-chat {
padding: 0.3rem 0.6rem;
background: none; border: 1px solid rgba(230,126,35,0.4); border-radius: var(--radius);
font-family: var(--mono); font-size: 0.68rem;
color: rgb(230,126,35); cursor: pointer;
transition: background 0.12s, border-color 0.12s;
}
.btn-chat:disabled { opacity: 0.3; cursor: not-allowed; }
.btn-chat:not(:disabled):hover { background: rgba(230,126,35,0.1); border-color: rgb(230,126,35); }
.btn-indent {
padding: 0.3rem 0.6rem;
background: none; border: 1px solid var(--border); border-radius: var(--radius);
font-family: var(--mono); font-size: 0.68rem;
color: var(--text-dim); cursor: pointer;
transition: color 0.12s, border-color 0.12s;
}
.btn-indent:disabled { opacity: 0.3; cursor: not-allowed; }
.btn-indent:not(:disabled):hover { color: var(--text); border-color: var(--text-faint); }
.btn-comment {
padding: 0.3rem 0.6rem;
background: none; border: 1px solid rgba(107,157,214,0.4); border-radius: var(--radius);
font-family: var(--mono); font-size: 0.68rem;
color: #6b9dd6; cursor: pointer;
transition: background 0.12s, border-color 0.12s;
}
.btn-comment:disabled { opacity: 0.3; cursor: not-allowed; }
.btn-comment:not(:disabled):hover { background: rgba(107,157,214,0.1); border-color: #6b9dd6; }
.btn-add-page, .btn-add-page,

View File

@ -2,22 +2,27 @@ require.config({ paths: { vs: 'https://cdn.jsdelivr.net/npm/monaco-editor@0.45.0
const { filename, is_db } = EDITOR; const { filename, is_db } = EDITOR;
let editor = null; // ── State ─────────────────────────────────────────────────────────────────────
let chapters = [];
let currentIndex = -1; let editor = null;
let dirty = new Set(); // indices with unsaved changes let chapters = []; // [{title, href, _id, _new, _serverIndex}, ...]
let pendingContent = new Map(); // index -> modified content not yet saved let nextLocalId = 0;
let pendingTitles = new Map(); // index -> modified title not yet saved (DB only) let pendingDeletes = []; // [{_serverIndex, title}, ...] to be deleted on save
let loadingChapter = false; // suppress dirty events during setValue let currentIndex = -1; // index into chapters[]
let saving = false; let dirty = new Set(); // chapter _ids with unsaved content/title changes
let pendingContent = new Map(); // _id -> content string
let pendingTitles = new Map(); // _id -> title string (DB only)
let structureDirty = false; // pending adds or deletes not yet on server
let loadingChapter = false;
let saving = false;
function currentCh() { return currentIndex >= 0 ? chapters[currentIndex] : null; }
// ── Init Monaco ─────────────────────────────────────────────────────────────── // ── Init Monaco ───────────────────────────────────────────────────────────────
require(['vs/editor/editor.main'], function () { require(['vs/editor/editor.main'], function () {
if (is_db) { document.getElementById('header-chapter').style.display = 'none';
document.getElementById('header-chapter').style.display = 'none'; document.getElementById('chapter-title-input').style.display = '';
document.getElementById('chapter-title-input').style.display = '';
}
editor = monaco.editor.create(document.getElementById('editor-pane'), { editor = monaco.editor.create(document.getElementById('editor-pane'), {
language: is_db ? 'html' : 'xml', language: is_db ? 'html' : 'xml',
@ -33,8 +38,9 @@ require(['vs/editor/editor.main'], function () {
editor.onDidChangeModelContent(() => { editor.onDidChangeModelContent(() => {
if (loadingChapter) return; if (loadingChapter) return;
if (currentIndex >= 0) { const ch = currentCh();
dirty.add(currentIndex); if (ch) {
dirty.add(ch._id);
renderChapterList(); renderChapterList();
setStatus('dirty', 'Unsaved changes'); setStatus('dirty', 'Unsaved changes');
document.getElementById('btn-save').disabled = false; document.getElementById('btn-save').disabled = false;
@ -45,18 +51,17 @@ require(['vs/editor/editor.main'], function () {
// Ctrl+S / Cmd+S // Ctrl+S / Cmd+S
editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, saveChapter); editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, saveChapter);
if (is_db) { document.getElementById('chapter-title-input').addEventListener('input', () => {
document.getElementById('chapter-title-input').addEventListener('input', () => { const ch = currentCh();
if (currentIndex >= 0) { if (ch) {
pendingTitles.set(currentIndex, document.getElementById('chapter-title-input').value); pendingTitles.set(ch._id, document.getElementById('chapter-title-input').value);
dirty.add(currentIndex); dirty.add(ch._id);
renderChapterList(); renderChapterList();
setStatus('dirty', 'Unsaved changes'); setStatus('dirty', 'Unsaved changes');
document.getElementById('btn-save').disabled = false; document.getElementById('btn-save').disabled = false;
updateSaveAll(); updateSaveAll();
} }
}); });
}
loadChapterList(); loadChapterList();
}); });
@ -69,18 +74,26 @@ async function loadChapterList(targetIndex = 0) {
setStatus('error', 'Failed to load chapters'); setStatus('error', 'Failed to load chapters');
return; return;
} }
chapters = await resp.json(); const raw = await resp.json();
if (!Array.isArray(chapters)) chapters = []; chapters = Array.isArray(raw)
? raw.map((ch, i) => ({ ...ch, _id: nextLocalId++, _new: false, _serverIndex: i }))
: [];
pendingDeletes = [];
dirty.clear();
pendingContent.clear();
pendingTitles.clear();
structureDirty = false;
if (chapters.length === 0) { if (chapters.length === 0) {
currentIndex = -1; currentIndex = -1;
dirty.clear();
pendingContent.clear();
pendingTitles.clear();
renderChapterList(); renderChapterList();
document.getElementById('header-chapter').textContent = 'No chapters'; document.getElementById('header-chapter').textContent = 'No chapters';
document.getElementById('btn-save').disabled = true; document.getElementById('btn-save').disabled = true;
document.getElementById('btn-break').disabled = true; document.getElementById('btn-break').disabled = true;
document.getElementById('btn-subheading').disabled = true;
document.getElementById('btn-chat').disabled = true;
document.getElementById('btn-indent').disabled = true;
document.getElementById('btn-comment').disabled = true;
document.getElementById('btn-del-page').disabled = true; document.getElementById('btn-del-page').disabled = true;
if (editor) { loadingChapter = true; editor.setValue(''); loadingChapter = false; } if (editor) { loadingChapter = true; editor.setValue(''); loadingChapter = false; }
updateSaveAll(); updateSaveAll();
@ -98,9 +111,8 @@ function renderChapterList() {
chapters.forEach((ch, i) => { chapters.forEach((ch, i) => {
const item = document.createElement('div'); const item = document.createElement('div');
item.className = 'chapter-item' + (i === currentIndex ? ' active' : ''); item.className = 'chapter-item' + (i === currentIndex ? ' active' : '');
item.innerHTML = const dot = dirty.has(ch._id) ? '<span class="dirty-dot"></span>' : '';
(dirty.has(i) ? '<span class="dirty-dot"></span>' : '') + item.innerHTML = dot + `<span class="chapter-item-title">${esc(ch.title)}</span>`;
`<span class="chapter-item-title">${esc(ch.title)}</span>`;
item.onclick = () => switchChapter(i); item.onclick = () => switchChapter(i);
el.appendChild(item); el.appendChild(item);
}); });
@ -110,36 +122,43 @@ function renderChapterList() {
async function switchChapter(index) { async function switchChapter(index) {
if (index === currentIndex) return; if (index === currentIndex) return;
// Preserve current editor content in pending cache before switching (never lose changes) // Flush current content/title to pending cache before switching
if (dirty.has(currentIndex) && editor) { const ch = currentCh();
pendingContent.set(currentIndex, editor.getValue()); if (ch) {
} if (dirty.has(ch._id) && editor) pendingContent.set(ch._id, editor.getValue());
// Preserve title input for DB books
if (is_db && currentIndex >= 0) {
const inp = document.getElementById('chapter-title-input'); const inp = document.getElementById('chapter-title-input');
if (inp) pendingTitles.set(currentIndex, inp.value); if (inp) pendingTitles.set(ch._id, inp.value);
} }
loadChapter(index); await loadChapter(index);
} }
async function loadChapter(index) { async function loadChapter(index) {
setStatus('', ''); setStatus('', '');
document.getElementById('btn-save').disabled = true; document.getElementById('btn-save').disabled = true;
document.getElementById('btn-break').disabled = true; document.getElementById('btn-break').disabled = true;
document.getElementById('btn-subheading').disabled = true;
document.getElementById('btn-chat').disabled = true;
document.getElementById('btn-del-page').disabled = true; document.getElementById('btn-del-page').disabled = true;
if (!is_db) document.getElementById('header-chapter').textContent = 'Loading…'; if (!is_db) document.getElementById('header-chapter').textContent = 'Loading…';
const ch = chapters[index];
if (!ch) return;
let content, title; let content, title;
if (pendingContent.has(index)) { if (pendingContent.has(ch._id)) {
content = pendingContent.get(index); content = pendingContent.get(ch._id);
title = pendingTitles.has(index) ? pendingTitles.get(index) : (chapters[index]?.title ?? ''); title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : ch.title;
} else if (ch._new) {
// New chapter not yet on server — starts empty
content = '';
title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : ch.title;
} else { } else {
const resp = await fetch(`/api/edit/chapter/${index}/${encodeURIComponent(filename)}`); const resp = await fetch(`/api/edit/chapter/${ch._serverIndex}/${encodeURIComponent(filename)}`);
if (!resp.ok) { setStatus('error', 'Load failed'); return; } if (!resp.ok) { setStatus('error', 'Load failed'); return; }
const data = await resp.json(); const data = await resp.json();
content = data.content; content = data.content;
title = pendingTitles.has(index) ? pendingTitles.get(index) : data.title; title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : data.title;
} }
currentIndex = index; currentIndex = index;
@ -150,22 +169,17 @@ async function loadChapter(index) {
loadingChapter = false; loadingChapter = false;
editor.focus(); editor.focus();
// Restore dirty state based on whether we loaded from pending cache const hasChanges = dirty.has(ch._id) || structureDirty;
if (dirty.has(index)) { document.getElementById('btn-save').disabled = !hasChanges;
document.getElementById('btn-save').disabled = false; if (hasChanges) setStatus('dirty', 'Unsaved changes');
setStatus('dirty', 'Unsaved changes');
} else {
document.getElementById('btn-save').disabled = true;
setStatus('', '');
}
renderChapterList(); renderChapterList();
if (is_db) { document.getElementById('chapter-title-input').value = title;
document.getElementById('chapter-title-input').value = title;
} else {
document.getElementById('header-chapter').textContent = title;
}
document.getElementById('btn-break').disabled = false; document.getElementById('btn-break').disabled = false;
document.getElementById('btn-subheading').disabled = false;
document.getElementById('btn-chat').disabled = false;
document.getElementById('btn-indent').disabled = false;
document.getElementById('btn-comment').disabled = false;
document.getElementById('btn-del-page').disabled = chapters.length <= 1; document.getElementById('btn-del-page').disabled = chapters.length <= 1;
updateSaveAll(); updateSaveAll();
} }
@ -173,34 +187,49 @@ async function loadChapter(index) {
// ── Save (current chapter) ──────────────────────────────────────────────────── // ── Save (current chapter) ────────────────────────────────────────────────────
async function saveChapter() { async function saveChapter() {
if (currentIndex < 0 || saving) return; if (saving) return;
saving = true; saving = true;
document.getElementById('btn-save').disabled = true; document.getElementById('btn-save').disabled = true;
setStatus('saving', 'Saving…');
// Apply structural changes (add/delete) before saving content
if (structureDirty) {
setStatus('saving', 'Applying changes…');
try {
await applyStructuralChanges();
} catch (e) {
setStatus('error', e.message || 'Failed to apply changes');
document.getElementById('btn-save').disabled = false;
saving = false;
return;
}
renderChapterList();
}
const ch = currentCh();
if (!ch || !dirty.has(ch._id)) {
// Structural changes saved, no content changes for this chapter
setStatus('saved', 'Saved');
setTimeout(() => setStatus('', ''), 2000);
saving = false;
updateSaveAll();
return;
}
setStatus('saving', 'Saving…');
try { try {
const saveBody = { content: editor.getValue() }; const saveBody = { content: editor.getValue() };
if (is_db) { const inp = document.getElementById('chapter-title-input');
const inp = document.getElementById('chapter-title-input'); saveBody.title = inp ? inp.value.trim() : (pendingTitles.get(ch._id) || '');
saveBody.title = inp ? inp.value.trim() : (pendingTitles.get(currentIndex) || '');
}
const resp = await fetch( const resp = await fetch(
`/api/edit/chapter/${currentIndex}/${encodeURIComponent(filename)}`, `/api/edit/chapter/${ch._serverIndex}/${encodeURIComponent(filename)}`,
{ { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(saveBody) }
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(saveBody),
}
); );
const data = await resp.json(); const data = await resp.json();
if (data.ok) { if (data.ok) {
dirty.delete(currentIndex); dirty.delete(ch._id);
pendingContent.delete(currentIndex); pendingContent.delete(ch._id);
if (is_db && chapters[currentIndex]) { ch.title = pendingTitles.get(ch._id) || ch.title;
const saved = pendingTitles.get(currentIndex) || chapters[currentIndex].title; pendingTitles.delete(ch._id);
chapters[currentIndex].title = saved || chapters[currentIndex].title;
pendingTitles.delete(currentIndex);
}
renderChapterList(); renderChapterList();
setStatus('saved', 'Saved'); setStatus('saved', 'Saved');
setTimeout(() => setStatus('', ''), 2000); setTimeout(() => setStatus('', ''), 2000);
@ -227,58 +256,64 @@ async function saveAllChapters() {
setStatus('saving', 'Saving all…'); setStatus('saving', 'Saving all…');
// Flush current editor content and title into pending caches first // Flush current editor content and title into pending caches first
if (currentIndex >= 0 && dirty.has(currentIndex)) { const ch = currentCh();
pendingContent.set(currentIndex, editor.getValue()); if (ch && dirty.has(ch._id)) {
if (is_db) { pendingContent.set(ch._id, editor.getValue());
const inp = document.getElementById('chapter-title-input'); const inp = document.getElementById('chapter-title-input');
if (inp) pendingTitles.set(currentIndex, inp.value); if (inp) pendingTitles.set(ch._id, inp.value);
}
} }
const indices = [...dirty]; // Apply structural changes first
for (const i of indices) { if (structureDirty) {
const content = pendingContent.has(i) setStatus('saving', 'Applying changes…');
? pendingContent.get(i) try {
: (i === currentIndex ? editor.getValue() : null); await applyStructuralChanges();
// For DB books, a title-only change has no pendingContent — still need to save } catch (e) {
const hasTitleChange = is_db && pendingTitles.has(i); setStatus('error', e.message || 'Failed to apply changes');
saving = false;
updateSaveAll();
return;
}
renderChapterList();
}
const ids = [...dirty];
for (const id of ids) {
const chapter = chapters.find(c => c._id === id);
if (!chapter) { dirty.delete(id); continue; }
const content = pendingContent.has(id)
? pendingContent.get(id)
: (chapter._id === currentCh()?._id ? editor.getValue() : null);
const hasTitleChange = pendingTitles.has(id);
if (!content && !hasTitleChange) continue; if (!content && !hasTitleChange) continue;
try { try {
const saveBody = { content: content || '' }; const saveBody = { content: content || '' };
if (is_db) saveBody.title = pendingTitles.has(i) ? pendingTitles.get(i) : (chapters[i]?.title || ''); saveBody.title = pendingTitles.has(id) ? pendingTitles.get(id) : (chapter.title || '');
const resp = await fetch( const resp = await fetch(
`/api/edit/chapter/${i}/${encodeURIComponent(filename)}`, `/api/edit/chapter/${chapter._serverIndex}/${encodeURIComponent(filename)}`,
{ { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(saveBody) }
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(saveBody),
}
); );
const data = await resp.json(); const data = await resp.json();
if (data.ok) { if (data.ok) {
dirty.delete(i); dirty.delete(id);
pendingContent.delete(i); pendingContent.delete(id);
if (is_db && chapters[i]) { chapter.title = pendingTitles.get(id) || chapter.title;
chapters[i].title = pendingTitles.get(i) || chapters[i].title; pendingTitles.delete(id);
pendingTitles.delete(i);
}
} }
} catch { } catch {
setStatus('error', `Save failed on chapter ${i + 1}`); setStatus('error', 'Save failed');
saving = false; saving = false;
updateSaveAll(); updateSaveAll();
return; return;
} }
} }
// Reload current chapter display to reflect saved state loadingChapter = true;
if (currentIndex >= 0) { editor.setValue(editor.getValue()); // refresh display
loadingChapter = true; loadingChapter = false;
editor.setValue(editor.getValue()); // no-op, just clears dirty for display document.getElementById('btn-save').disabled = true;
loadingChapter = false;
document.getElementById('btn-save').disabled = true;
}
renderChapterList(); renderChapterList();
setStatus('saved', 'All saved'); setStatus('saved', 'All saved');
@ -294,11 +329,65 @@ function updateSaveAll() {
if (count > 1) { if (count > 1) {
btn.style.display = 'flex'; btn.style.display = 'flex';
btn.textContent = `Save all (${count})`; btn.textContent = `Save all (${count})`;
btn.disabled = false;
} else { } else {
btn.style.display = 'none'; btn.style.display = 'none';
} }
} }
// ── Apply structural changes (adds/deletes) ───────────────────────────────────
async function applyStructuralChanges() {
// Step 1: apply deletes in descending server-index order so earlier indices stay valid
const sorted = [...pendingDeletes].sort((a, b) => b._serverIndex - a._serverIndex);
for (const del of sorted) {
const resp = await fetch(
`/api/edit/chapter/${del._serverIndex}/${encodeURIComponent(filename)}`,
{ method: 'DELETE' }
);
if (!resp.ok) {
const data = await resp.json().catch(() => ({}));
throw new Error(data.error || 'Delete failed');
}
// Shift server indices for remaining chapters
chapters.forEach(c => {
if (c._serverIndex !== null && c._serverIndex > del._serverIndex) c._serverIndex--;
});
}
pendingDeletes = [];
// Step 2: apply adds in order of appearance in chapters[]
for (const ch of chapters.filter(c => c._new)) {
const localIdx = chapters.indexOf(ch);
// Find nearest preceding chapter that already has a server index
let afterServerIndex = -1;
for (let j = localIdx - 1; j >= 0; j--) {
if (chapters[j]._serverIndex !== null) {
afterServerIndex = chapters[j]._serverIndex;
break;
}
}
const title = pendingTitles.has(ch._id) ? pendingTitles.get(ch._id) : ch.title;
const resp = await fetch(
`/api/edit/chapter/add/${encodeURIComponent(filename)}`,
{ method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ title, after_index: afterServerIndex }) }
);
const data = await resp.json();
if (!resp.ok || !data.ok) throw new Error(data.error || 'Add chapter failed');
const addedIdx = data.index;
// Shift server indices for chapters inserted after this position
chapters.forEach(c => {
if (c._id !== ch._id && c._serverIndex !== null && c._serverIndex >= addedIdx) c._serverIndex++;
});
ch._serverIndex = addedIdx;
ch._new = false;
}
structureDirty = false;
}
// ── Insert break ────────────────────────────────────────────────────────────── // ── Insert break ──────────────────────────────────────────────────────────────
function insertBreak() { function insertBreak() {
@ -313,66 +402,174 @@ function insertBreak() {
editor.focus(); editor.focus();
} }
// ── Wrap selection in tag ─────────────────────────────────────────────────────
function wrapTag(tag, attrs) {
if (!editor || currentIndex < 0) return;
const sel = editor.getSelection();
const open = attrs ? `<${tag} ${attrs}>` : `<${tag}>`;
const close = `</${tag}>`;
const selectedText = editor.getModel().getValueInRange(sel);
const isEmpty = sel.startLineNumber === sel.endLineNumber && sel.startColumn === sel.endColumn;
editor.executeEdits('wrap-tag', [{
range: sel,
text: open + selectedText + close,
forceMoveMarkers: true,
}]);
if (isEmpty) {
const pos = editor.getPosition();
editor.setPosition({ lineNumber: pos.lineNumber, column: pos.column - close.length });
}
editor.focus();
}
// ── Add / delete chapter ───────────────────────────────────────────────────── // Wraps selected text in a span (inline). If the selection contains block
// elements (<p>, <div>, <h*>) the span is replaced by a <div> so the result
// stays valid HTML.
function wrapSpan(cls) {
if (!editor || currentIndex < 0) return;
const sel = editor.getSelection();
const selectedText = editor.getModel().getValueInRange(sel);
const hasBlock = /<(p|div|h[1-6]|blockquote|ul|ol|li)[\s>]/i.test(selectedText);
const tag = hasBlock ? 'div' : 'span';
wrapTag(tag, `class="${cls}"`);
}
function insertIndent() {
if (!editor || currentIndex < 0) return;
const sel = editor.getSelection();
const selectedText = editor.getModel().getValueInRange(sel);
const hasBlock = /<(p|div|h[1-6]|blockquote|ul|ol|li)[\s>]/i.test(selectedText);
// If selection contains block elements wrap in a div, else in a p
const tag = hasBlock ? 'div' : 'p';
wrapTag(tag, 'style="padding-left: 40px;"');
}
function insertComment() { wrapTag('div', 'class="novela-comment"'); }
// ── Add / delete chapter ──────────────────────────────────────────────────────
async function addChapter() { async function addChapter() {
if (saving) return; if (saving) return;
if (dirty.size > 0) {
alert('Save pending changes before adding a page.');
return;
}
const title = prompt('Title for new page:', `New chapter ${Math.max(chapters.length + 1, 1)}`); const title = prompt('Title for new page:', `New chapter ${Math.max(chapters.length + 1, 1)}`);
if (title === null) return; if (title === null) return;
const resp = await fetch(`/api/edit/chapter/add/${encodeURIComponent(filename)}`, { const insertIdx = currentIndex < 0 ? chapters.length : currentIndex + 1;
method: 'POST', const newCh = {
headers: { 'Content-Type': 'application/json' }, title: title.trim() || 'New chapter',
body: JSON.stringify({ title, after_index: currentIndex }), href: null,
}); _id: nextLocalId++,
const data = await resp.json(); _new: true,
if (!resp.ok || !data.ok) { _serverIndex: null,
setStatus('error', data.error || 'Add page failed'); };
return; chapters.splice(insertIdx, 0, newCh);
} structureDirty = true;
currentIndex = insertIdx;
dirty.clear(); renderChapterList();
pendingContent.clear();
pendingTitles.clear(); loadingChapter = true;
await loadChapterList(data.index ?? Math.max(currentIndex + 1, 0)); editor.setValue('');
setStatus('saved', 'Page added'); editor.setScrollTop(0);
setTimeout(() => setStatus('', ''), 1500); loadingChapter = false;
editor.focus();
document.getElementById('btn-save').disabled = false;
document.getElementById('btn-break').disabled = false;
document.getElementById('btn-subheading').disabled = false;
document.getElementById('btn-chat').disabled = false;
document.getElementById('btn-indent').disabled = false;
document.getElementById('btn-comment').disabled = false;
document.getElementById('btn-del-page').disabled = chapters.length <= 1;
setStatus('dirty', 'Unsaved changes');
document.getElementById('chapter-title-input').value = newCh.title;
updateSaveAll();
} }
async function deleteChapter() { async function deleteChapter() {
if (saving || currentIndex < 0) return; if (saving || currentIndex < 0) return;
saving = true;
if (chapters.length <= 1) { if (chapters.length <= 1) {
alert('Cannot delete the last page.'); alert('Cannot delete the last page.');
saving = false;
return; return;
} }
if (dirty.size > 0) { const ch = chapters[currentIndex];
alert('Save pending changes before deleting a page.'); const chTitle = ch.title || `chapter ${currentIndex + 1}`;
return; if (!confirm(`Delete page "${chTitle}"?`)) { saving = false; return; }
}
const chTitle = chapters[currentIndex]?.title || `chapter ${currentIndex + 1}`;
if (!confirm(`Delete page "${chTitle}"?`)) return;
const resp = await fetch(`/api/edit/chapter/${currentIndex}/${encodeURIComponent(filename)}`, { // Clean up pending state for this chapter
method: 'DELETE', dirty.delete(ch._id);
}); pendingContent.delete(ch._id);
const data = await resp.json(); pendingTitles.delete(ch._id);
if (!resp.ok || !data.ok) {
setStatus('error', data.error || 'Delete page failed'); const removedIndex = currentIndex;
if (ch._new) {
// Never reached the server — just remove locally
chapters.splice(removedIndex, 1);
structureDirty = pendingDeletes.length > 0 || chapters.some(c => c._new);
} else {
pendingDeletes.push({ _serverIndex: ch._serverIndex, title: chTitle });
chapters.splice(removedIndex, 1);
structureDirty = true;
}
const newCount = chapters.length;
if (newCount === 0) {
currentIndex = -1;
renderChapterList();
if (!is_db) document.getElementById('header-chapter').textContent = 'No chapters';
document.getElementById('btn-save').disabled = !structureDirty;
if (structureDirty) setStatus('dirty', 'Unsaved changes');
document.getElementById('btn-break').disabled = true;
document.getElementById('btn-subheading').disabled = true;
document.getElementById('btn-chat').disabled = true;
document.getElementById('btn-indent').disabled = true;
document.getElementById('btn-comment').disabled = true;
document.getElementById('btn-del-page').disabled = true;
if (editor) { loadingChapter = true; editor.setValue(''); loadingChapter = false; }
saving = false;
updateSaveAll();
return; return;
} }
dirty.clear(); const newIdx = Math.min(removedIndex, newCount - 1);
pendingContent.clear(); renderChapterList();
pendingTitles.clear(); await loadChapter(newIdx);
await loadChapterList(data.index ?? Math.max(currentIndex - 1, 0)); setStatus('dirty', 'Unsaved changes');
setStatus('saved', 'Page deleted'); saving = false;
setTimeout(() => setStatus('', ''), 1500); }
// ── Generate Book Info page ───────────────────────────────────────────────────
async function generateIntroPage() {
if (saving) return;
if (structureDirty || dirty.size > 0) {
alert('Please save pending changes before generating the info page.');
return;
}
saving = true;
setStatus('saving', 'Generating info page…');
try {
const resp = await fetch(
`/api/edit/intro/${encodeURIComponent(filename)}`,
{ method: 'POST' }
);
const data = await resp.json().catch(() => ({}));
if (!resp.ok || !data.ok) {
setStatus('error', data.error || 'Failed to generate info page');
saving = false;
return;
}
setStatus('saved', 'Info page added');
setTimeout(() => setStatus('', ''), 2000);
saving = false;
await loadChapterList(0);
} catch {
setStatus('error', 'Failed to generate info page');
saving = false;
}
} }
// ── Find & Replace all chapters ─────────────────────────────────────────────── // ── Find & Replace all chapters ───────────────────────────────────────────────
@ -410,7 +607,7 @@ async function replaceInAllChapters() {
? new RegExp(searchVal, caseSens ? 'g' : 'gi') ? new RegExp(searchVal, caseSens ? 'g' : 'gi')
: new RegExp(searchVal.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), caseSens ? 'g' : 'gi'); : new RegExp(searchVal.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), caseSens ? 'g' : 'gi');
} catch (e) { } catch (e) {
prog.className = 'modal-progress error'; prog.className = 'modal-progress error';
prog.textContent = 'Invalid regex: ' + e.message; prog.textContent = 'Invalid regex: ' + e.message;
runBtn.disabled = false; runBtn.disabled = false;
return; return;
@ -420,20 +617,22 @@ async function replaceInAllChapters() {
let chaptersChanged = 0; let chaptersChanged = 0;
// Flush current editor content into pending before we start // Flush current editor content into pending before we start
if (currentIndex >= 0) { const curCh = currentCh();
pendingContent.set(currentIndex, editor.getValue()); if (curCh) pendingContent.set(curCh._id, editor.getValue());
}
for (let i = 0; i < chapters.length; i++) { for (let i = 0; i < chapters.length; i++) {
prog.className = 'modal-progress'; const ch = chapters[i];
prog.className = 'modal-progress';
prog.textContent = `Checking chapter ${i + 1} / ${chapters.length}`; prog.textContent = `Checking chapter ${i + 1} / ${chapters.length}`;
let original; let original;
if (pendingContent.has(i)) { if (pendingContent.has(ch._id)) {
original = pendingContent.get(i); original = pendingContent.get(ch._id);
} else if (ch._new) {
original = '';
} else { } else {
try { try {
const resp = await fetch(`/api/edit/chapter/${i}/${encodeURIComponent(filename)}`); const resp = await fetch(`/api/edit/chapter/${ch._serverIndex}/${encodeURIComponent(filename)}`);
if (!resp.ok) continue; if (!resp.ok) continue;
const data = await resp.json(); const data = await resp.json();
original = data.content; original = data.content;
@ -442,21 +641,21 @@ async function replaceInAllChapters() {
} }
} }
// Count occurrences
let count = 0; let count = 0;
const updated = original.replace(pattern, m => { count++; return replaceVal; }); const updated = original.replace(pattern, m => { count++; return replaceVal; });
if (count === 0) continue; if (count === 0) continue;
totalOccurrences += count; totalOccurrences += count;
chaptersChanged++; chaptersChanged++;
pendingContent.set(i, updated); pendingContent.set(ch._id, updated);
dirty.add(i); dirty.add(ch._id);
} }
// Reload current chapter from pending cache if it was changed // Reload current chapter from pending cache if it was changed
if (dirty.has(currentIndex) && pendingContent.has(currentIndex)) { const cur = currentCh();
if (cur && dirty.has(cur._id) && pendingContent.has(cur._id)) {
loadingChapter = true; loadingChapter = true;
editor.setValue(pendingContent.get(currentIndex)); editor.setValue(pendingContent.get(cur._id));
loadingChapter = false; loadingChapter = false;
document.getElementById('btn-save').disabled = false; document.getElementById('btn-save').disabled = false;
setStatus('dirty', 'Unsaved changes'); setStatus('dirty', 'Unsaved changes');
@ -465,7 +664,7 @@ async function replaceInAllChapters() {
renderChapterList(); renderChapterList();
updateSaveAll(); updateSaveAll();
prog.className = totalOccurrences > 0 ? 'modal-progress ok' : 'modal-progress'; prog.className = totalOccurrences > 0 ? 'modal-progress ok' : 'modal-progress';
prog.textContent = totalOccurrences > 0 prog.textContent = totalOccurrences > 0
? `${totalOccurrences} replacement${totalOccurrences !== 1 ? 's' : ''} in ${chaptersChanged} chapter${chaptersChanged !== 1 ? 's' : ''} — not saved yet.` ? `${totalOccurrences} replacement${totalOccurrences !== 1 ? 's' : ''} in ${chaptersChanged} chapter${chaptersChanged !== 1 ? 's' : ''} — not saved yet.`
: 'No matches found.'; : 'No matches found.';
@ -476,6 +675,6 @@ async function replaceInAllChapters() {
function setStatus(cls, text) { function setStatus(cls, text) {
const el = document.getElementById('save-status'); const el = document.getElementById('save-status');
el.className = 'save-status' + (cls ? ' ' + cls : ''); el.className = 'save-status' + (cls ? ' ' + cls : '');
el.textContent = text; el.textContent = text;
} }

View File

@ -205,6 +205,7 @@
<button class="btn" onclick="refreshAll()">Refresh</button> <button class="btn" onclick="refreshAll()">Refresh</button>
</div> </div>
<div class="status-line" id="run-result"></div> <div class="status-line" id="run-result"></div>
<div class="status-line" id="run-progress" style="display:none;"></div>
</section> </section>
<section class="card"> <section class="card">
@ -225,7 +226,8 @@
<tr> <tr>
<th>ID</th> <th>ID</th>
<th>Status</th> <th>Status</th>
<th>Files</th> <th title="Files scanned in the library">Scanned</th>
<th title="Objects actually uploaded to Dropbox (library + snapshot + pg_dump)">Uploaded</th>
<th>Bytes</th> <th>Bytes</th>
<th>Started</th> <th>Started</th>
<th>Finished</th> <th>Finished</th>
@ -317,7 +319,8 @@
el.innerHTML = [ el.innerHTML = [
rowHtml('ID', d.id), rowHtml('ID', d.id),
rowHtml('Status', d.status), rowHtml('Status', d.status),
rowHtml('Files', d.files_count ?? '-'), rowHtml('Scanned', d.scanned_files ?? '-'),
rowHtml('Uploaded', d.uploaded_files ?? '-'),
rowHtml('Bytes', d.size_bytes ?? '-'), rowHtml('Bytes', d.size_bytes ?? '-'),
rowHtml('Started', d.started_at ?? '-'), rowHtml('Started', d.started_at ?? '-'),
rowHtml('Finished', d.finished_at ?? '-'), rowHtml('Finished', d.finished_at ?? '-'),
@ -327,18 +330,19 @@
async function loadHistory() { async function loadHistory() {
const body = document.getElementById('history-body'); const body = document.getElementById('history-body');
body.innerHTML = '<tr><td colspan="7">Loading...</td></tr>'; body.innerHTML = '<tr><td colspan="8">Loading...</td></tr>';
const r = await fetch('/api/backup/history'); const r = await fetch('/api/backup/history');
const rows = await r.json(); const rows = await r.json();
if (!rows.length) { if (!rows.length) {
body.innerHTML = '<tr><td colspan="7">No backup history yet.</td></tr>'; body.innerHTML = '<tr><td colspan="8">No backup history yet.</td></tr>';
return; return;
} }
body.innerHTML = rows.map((x) => ` body.innerHTML = rows.map((x) => `
<tr> <tr>
<td>${esc(x.id)}</td> <td>${esc(x.id)}</td>
<td>${esc(x.status)}</td> <td>${esc(x.status)}</td>
<td>${esc(x.files_count ?? '-')}</td> <td>${esc(x.scanned_files ?? '-')}</td>
<td>${esc(x.uploaded_files ?? '-')}</td>
<td>${esc(x.size_bytes ?? '-')}</td> <td>${esc(x.size_bytes ?? '-')}</td>
<td>${esc(x.started_at ?? '-')}</td> <td>${esc(x.started_at ?? '-')}</td>
<td>${esc(x.finished_at ?? '-')}</td> <td>${esc(x.finished_at ?? '-')}</td>
@ -520,8 +524,9 @@
out.className = 'status-line ok'; out.className = 'status-line ok';
if (d.status === 'running') { if (d.status === 'running') {
out.textContent = `Backup started in background. id=${d.backup_id}, dry_run=${d.dry_run}`; out.textContent = `Backup started in background. id=${d.backup_id}, dry_run=${d.dry_run}`;
// Immediately kick off sidebar progress polling // Immediately kick off sidebar progress polling and page progress polling
if (typeof loadBackupProgress === 'function') loadBackupProgress(); if (typeof loadBackupProgress === 'function') loadBackupProgress();
pollRunProgress();
} else { } else {
out.textContent = `Backup ${d.status}. id=${d.backup_id}, files=${d.files_count}, bytes=${d.size_bytes}, dry_run=${d.dry_run}`; out.textContent = `Backup ${d.status}. id=${d.backup_id}, files=${d.files_count}, bytes=${d.size_bytes}, dry_run=${d.dry_run}`;
} }
@ -541,6 +546,34 @@
async function refreshAll() { async function refreshAll() {
await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory(), loadSnapshots()]); await Promise.all([loadDropboxSettings(), loadHealth(), loadStatus(), loadHistory(), loadSnapshots()]);
pollRunProgress();
}
let _runProgressTimer = null;
async function pollRunProgress() {
if (_runProgressTimer) { clearTimeout(_runProgressTimer); _runProgressTimer = null; }
const el = document.getElementById('run-progress');
if (!el) return;
try {
const r = await fetch('/api/backup/progress');
const d = await r.json();
if (!d.running) {
el.style.display = 'none';
el.textContent = '';
return;
}
const phase = d.phase || '';
const phaseLbl = phase === 'scanning' ? 'scanning library'
: phase === 'uploading' ? 'uploading library objects'
: phase === 'snapshot' ? 'uploading snapshot'
: phase === 'pg_dump' ? 'uploading pg_dump (may take minutes)'
: phase || 'running';
const counter = d.total > 0 ? `${d.done} / ${d.total}` : '';
el.className = 'status-line warn';
el.style.display = '';
el.textContent = counter ? `Phase: ${phaseLbl} · ${counter} files scanned` : `Phase: ${phaseLbl}`;
} catch (_) { /* ignore */ }
_runProgressTimer = setTimeout(pollRunProgress, 3000);
} }
// ── Restore ───────────────────────────────────────────────────────────── // ── Restore ─────────────────────────────────────────────────────────────

View File

@ -46,6 +46,18 @@
</svg> </svg>
Break Break
</button> </button>
<button class="btn-subheading" id="btn-subheading" onclick="wrapSpan('subheading')" title="Wrap selection as subheading" disabled>S</button>
<button class="btn-chat" id="btn-chat" onclick="wrapSpan('chat')" title="Wrap selection as chat" disabled>C</button>
<button class="btn-indent" id="btn-indent" onclick="insertIndent()" title="Wrap selection as indented paragraph" disabled>→|</button>
<button class="btn-comment" id="btn-comment" onclick="insertComment()" title="Wrap selection as author comment block" disabled>[ ]</button>
<button class="btn-info-page" onclick="generateIntroPage()" title="Generate a Book Info page as the first chapter">
<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="12" cy="12" r="9"/>
<line x1="12" y1="8" x2="12" y2="8.01"/>
<polyline points="11 12 12 12 12 16 13 16"/>
</svg>
Info page
</button>
<button class="btn-replace" onclick="openReplaceModal()" title="Find &amp; replace across all chapters"> <button class="btn-replace" onclick="openReplaceModal()" title="Find &amp; replace across all chapters">
<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> <svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/> <circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/>

View File

@ -219,6 +219,19 @@
margin-bottom: 2.5rem; padding-bottom: 1rem; margin-bottom: 2.5rem; padding-bottom: 1rem;
border-bottom: 1px solid var(--border); border-bottom: 1px solid var(--border);
} }
#chapter-content span.subheading { color: rgb(224, 62, 45); font-weight: bold; }
#chapter-content span.chat { color: rgb(230, 126, 35); }
#chapter-content p[style*="padding-left"] { padding-left: 40px; }
#chapter-content .novela-comment {
border-left: 3px solid #6b9dd6;
background: rgba(107,157,214,0.12);
padding: 0.6em 1em;
margin: 1em 0;
border-radius: 0 var(--radius) var(--radius) 0;
color: var(--text-dim);
font-size: 0.92em;
display: block;
}
/* Chapter nav */ /* Chapter nav */
.chapter-nav { .chapter-nav {

View File

@ -95,6 +95,12 @@ def is_break_element(el, empty_p_is_spacer: bool = False) -> bool:
return False return False
def _clean_text(text: str) -> str:
"""Normalize non-breaking spaces to regular spaces and collapse runs of spaces."""
text = text.replace("\xa0", " ")
return re.sub(r" {2,}", " ", text)
def element_to_xhtml(el, break_img_path: str = "../Images/break.png", empty_p_is_spacer: bool = False) -> str: def element_to_xhtml(el, break_img_path: str = "../Images/break.png", empty_p_is_spacer: bool = False) -> str:
"""Convert a BeautifulSoup element to an XHTML fragment.""" """Convert a BeautifulSoup element to an XHTML fragment."""
if is_break_element(el, empty_p_is_spacer): if is_break_element(el, empty_p_is_spacer):
@ -118,7 +124,7 @@ def element_to_xhtml(el, break_img_path: str = "../Images/break.png", empty_p_is
return f"<!-- {body} -->" return f"<!-- {body} -->"
if isinstance(el, NavigableString): if isinstance(el, NavigableString):
text = str(el) text = _clean_text(str(el))
if text.strip(): if text.strip():
return he(text) return he(text)
return "" return ""
@ -175,6 +181,27 @@ def element_to_xhtml(el, break_img_path: str = "../Images/break.png", empty_p_is
return "".join(parts) return "".join(parts)
_BREAK_LINE_RE = re.compile(
r'<center>\s*<img\s+src="[^"]*break\.png"[^>]*/>\s*</center>',
re.IGNORECASE,
)
def collapse_consecutive_breaks(xhtml: str) -> str:
"""Collapse runs of 2+ consecutive break images (optionally separated by
whitespace) into a single break."""
pattern = re.compile(
rf'(?:{_BREAK_LINE_RE.pattern}\s*){{2,}}',
re.IGNORECASE,
)
def _replace(match: re.Match) -> str:
first = _BREAK_LINE_RE.search(match.group(0))
return first.group(0) if first else match.group(0)
return pattern.sub(_replace, xhtml)
def normalize_wysiwyg_html(raw_html: str, break_img_path: str = "../Images/break.png") -> str: def normalize_wysiwyg_html(raw_html: str, break_img_path: str = "../Images/break.png") -> str:
"""Normaliseer HTML uit de WYSIWYG-editor naar EPUB-compatibele XHTML. """Normaliseer HTML uit de WYSIWYG-editor naar EPUB-compatibele XHTML.
@ -190,7 +217,7 @@ def normalize_wysiwyg_html(raw_html: str, break_img_path: str = "../Images/break
def process_inline(el) -> str: def process_inline(el) -> str:
if isinstance(el, NavigableString): if isinstance(el, NavigableString):
text = str(el) text = _clean_text(str(el))
return he(text) if text else "" return he(text) if text else ""
if el.name in ("strong", "b"): if el.name in ("strong", "b"):
inner = "".join(process_inline(c) for c in el.children) inner = "".join(process_inline(c) for c in el.children)
@ -256,4 +283,4 @@ def normalize_wysiwyg_html(raw_html: str, break_img_path: str = "../Images/break
if result: if result:
output_parts.append(result) output_parts.append(result)
return "\n".join(output_parts) return collapse_consecutive_breaks("\n".join(output_parts))

View File

@ -12,6 +12,50 @@ It is the primary technical reference for the current implementation.
2. `run_migrations()` 2. `run_migrations()`
3. `start_backup_scheduler()` 3. `start_backup_scheduler()`
4. mount routers 4. mount routers
- Logging: `logging.basicConfig(level=logging.INFO)` is set in `main.py` so application loggers emit to stdout alongside uvicorn output.
- `GET /health` — runs `SELECT 1` against the database; returns `{"ok": true|false}`.
## Migrations (`migrations.py`)
All schema changes are tracked in a `schema_migrations` table (name `VARCHAR(200) PRIMARY KEY`, `applied_at TIMESTAMP`). The table itself is created idempotently on every startup before any migration runs.
`_run_once(conn, name, fn)` checks whether `name` is present in `schema_migrations`:
- If present: logs `{name} — skipped (already applied)` and returns.
- If absent: calls `fn(conn)`, inserts the name, commits, and logs `{name} — executed in {N}ms`.
`run_migrations()` opens a single `direct_connect()` connection, runs all migrations in order, closes the connection, and logs a summary:
- `Migrations complete in {X}s — all already applied` — when no migration ran.
- `Migrations complete in {X}s — {N} executed` — when one or more ran.
### Migration list (in order)
| Name | What it does |
|---|---|
| `create_library` | Main `library` table |
| `create_book_tags` | `book_tags` table |
| `create_reading_progress` | `reading_progress` table |
| `create_reading_sessions` | `reading_sessions` table |
| `create_library_cover_cache` | `library_cover_cache` table |
| `create_credentials` | `credentials` table |
| `create_break_patterns` | `break_patterns` table |
| `create_backup_log` | `backup_log` table |
| `create_perf_indexes` | Performance indexes on library, reading_sessions, book_tags |
| `seed_break_patterns` | Insert default break patterns |
| `add_rating` | `rating` column on `library` |
| `remove_cover_missing_tag` | Remove legacy `Cover Missing` tag from `book_tags` |
| `create_bookmarks` | `bookmarks` table |
| `series_suffix` | `series_suffix VARCHAR(10)` on `library` |
| `create_builder_drafts` | `builder_drafts` table |
| `create_authors` | `authors` table |
| `rename_hiatus` | Rename `Hiatus``Long-Term Hold` in `library.publication_status` |
| `add_storage_type` | `storage_type VARCHAR(10)` on `library` |
| `create_book_images` | `book_images` table |
| `create_book_chapters` | `book_chapters` table + GIN index on `content_tsv` |
| `rebuild_chapter_tsv_with_title` | Rebuild `content_tsv` to include chapter title in FTS vector |
| `create_app_settings` | `app_settings` table (single row) |
| `app_settings_break_image` | `break_image_sha256` + `break_image_ext` columns on `app_settings` |
| `series_volume` | `series_volume VARCHAR(20)` on `library` |
| `backup_log_scanned_files` | `scanned_files INTEGER` on `backup_log` (distinguishes scanned vs uploaded) |
- Shutdown lifecycle: - Shutdown lifecycle:
1. `stop_backup_scheduler()` 1. `stop_backup_scheduler()`
2. `close_pool()` 2. `close_pool()`
@ -86,7 +130,7 @@ All files are stored under `library/` (relative to the app working directory, ma
`GET /api/library` runs in fast-path mode by default (DB-only, no full disk rescan). `GET /api/library` runs in fast-path mode by default (DB-only, no full disk rescan).
For a forced sync: `GET /api/library?rescan=true` or `POST /library/rescan`. For a forced sync: `GET /api/library?rescan=true` or `POST /library/rescan`.
`include_file_info=true` is optional for file size/mtime enrichment. `include_file_info=true` is optional for file size/mtime enrichment.
ETag caching: response includes `ETag: "{count}-{max_updated_at_unix}"` and `Cache-Control: no-cache`. Client sends `If-None-Match`; server returns `304 Not Modified` when nothing changed. ETag caching: response includes an ETag combining row count, `MAX(updated_at)` from `library`, `MAX(updated_at)` from `reading_progress`, and `MAX(id)` from `book_tags`. Client sends `If-None-Match`; server returns `304 Not Modified` when nothing changed.
`/api/home` returns: `/api/home` returns:
- `continue_reading` - `continue_reading`
@ -144,9 +188,10 @@ Filename parsing is done client-side in `bulk_import.html`. The page uses a free
### `routers/editor.py` ### `routers/editor.py`
- `GET /library/editor/{filename}` — chapter editor page; supports both EPUB files and DB-stored books (`db/…` filenames); passes `is_db` flag to template; DB branch queries `library` table directly (no file check) - `GET /library/editor/{filename}` — chapter editor page; supports both EPUB files and DB-stored books (`db/…` filenames); passes `is_db` flag to template; DB branch queries `library` table directly (no file check)
- `GET /api/edit/chapter/{index}/{filename}` — get chapter content; DB branch reads from `book_chapters` and returns `{index, href, title, content}` - `GET /api/edit/chapter/{index}/{filename}` — get chapter content; DB branch reads from `book_chapters` and returns `{index, href, title, content}`
- `POST /api/edit/chapter/{index}/{filename}` — save chapter; DB branch accepts `{content, title}`, calls `upsert_chapter` (updates `content_tsv` too) - `POST /api/edit/chapter/{index}/{filename}` — save chapter; DB branch accepts `{content, title}`, calls `upsert_chapter` (updates `content_tsv` too); file-EPUB branch also accepts `title` alongside `content` — when the title changed, helper `_update_epub_navpoint_title(path, href, new_title)` locates the matching NCX `navPoint` by content src and rewrites its `<text>` via `rewrite_epub_entries`
- `POST /api/edit/chapter/add/{filename}` — add new chapter after `after_index`; DB branch shifts `chapter_index` up via `UPDATE … SET chapter_index = chapter_index + 1 WHERE chapter_index >= insert_idx` then inserts - `POST /api/edit/chapter/add/{filename}` — add new chapter after `after_index`; DB branch shifts `chapter_index` up via `UPDATE … SET chapter_index = chapter_index + 1 WHERE chapter_index >= insert_idx` then inserts
- `DELETE /api/edit/chapter/{index}/{filename}` — delete chapter; DB branch deletes and re-indexes via `UPDATE … SET chapter_index = chapter_index - 1 WHERE chapter_index > index` - `DELETE /api/edit/chapter/{index}/{filename}` — delete chapter; DB branch deletes and re-indexes via `UPDATE … SET chapter_index = chapter_index - 1 WHERE chapter_index > index`
- `POST /api/edit/intro/{filename}` — generate a gayauthors-style Book Info page from stored metadata (author, genres, sub-genres, tags, description, source, updated) and prepend it as the first chapter. DB books: shifts existing `chapter_index` values up by one (two-step negation to avoid unique-constraint violations) and inserts `"Book Info"` at index 0. File EPUBs: writes a new `intro_<hex>.xhtml` via `make_intro_xhtml`, adds a manifest item, places the `itemref` at the start of the spine, and inserts a navPoint at the top of the NCX with renumbered `playOrder`. Empty metadata fields are skipped; no duplicate detection — clicking it again adds another page.
### `routers/grabber.py` ### `routers/grabber.py`
- `GET /grabber` — grabber page - `GET /grabber` — grabber page
@ -163,23 +208,37 @@ Filename parsing is done client-side in `bulk_import.html`. The page uses a free
Scrape/convert flow (DB storage — default): Scrape/convert flow (DB storage — default):
1. Fetch book info + chapters via scraper 1. Fetch book info + chapters via scraper
2. Per chapter: download images → write to `library/images/{sha2}/{sha256}{ext}` (content-addressed) → rewrite `img[src]` to `/library/db-images/...`; break images replaced with `<hr>` before `element_to_xhtml` runs → build `content_html` via `element_to_xhtml` with `break_img_path="/static/break.png"` 2. Per chapter: download images → write to `library/images/{sha2}/{sha256}{ext}` (content-addressed) → rewrite `img[src]` to `/library/db-images/...`; break images replaced with `<hr>` before `element_to_xhtml` runs → build `content_html` via `element_to_xhtml` with `break_img_path="/static/break.png"``collapse_consecutive_breaks()` applied to the assembled chapter HTML
3. One DB transaction: `ensure_unique_db_filename``upsert_book` (storage_type='db') → `upsert_chapter` for each chapter → `upsert_cover_cache` if cover provided 3. A Book Info page is built via `epub_utils.build_book_info_body_html(title, author, info)` and persisted as a real stored chapter at index 0 (`title = "Book Info"`) so it is visible in the editor and reader
4. Synthetic filename: `db/{publisher}/{author}/{title}` (or `db/{pub}/{auth}/Series/{series}/{idx} - {title}` for series) 4. One DB transaction: `ensure_unique_db_filename``upsert_book` (storage_type='db') → `upsert_chapter` for each chapter (info page first) → `upsert_cover_cache` if cover provided
5. Synthetic filename: `db/{publisher}/{author}/{title}` (or `db/{pub}/{auth}/Series/{series}/{idx} - {title}` for series)
Scrape/convert flow (EPUB file — `storage_mode: "epub"`): Scrape/convert flow (EPUB file — `storage_mode: "epub"`):
12. Same as DB flow; `break_img_path="../Images/break.png"` passed to `element_to_xhtml` 12. Same as DB flow; `break_img_path="../Images/break.png"` passed to `element_to_xhtml`
3. Chapters converted to XHTML via `make_chapter_xhtml`; EPUB file built via `make_epub` (embeds `static/break.png` as `OEBPS/Images/break.png`) and written to `library/epub/…` 3. Chapters converted to XHTML via `make_chapter_xhtml`; EPUB file built via `make_epub` (embeds `static/break.png` as `OEBPS/Images/break.png`, emits `intro.xhtml` via its own info-page logic) and written to `library/epub/…`
4. `upsert_book` called with `storage_type='file'` 4. `upsert_book` called with `storage_type='file'`
### Scrapers (`scrapers/`) ### Scrapers (`scrapers/`)
All scrapers inherit `BaseScraper` and implement `matches(url)`, `login()`, `fetch_book_info()`, `fetch_chapter()`. Registration order in `scrapers/__init__.py` determines priority (first match wins). All scrapers inherit `BaseScraper` and implement `matches(url)`, `login()`, `fetch_book_info()`, `fetch_chapter()`. Registration order in `scrapers/__init__.py` determines priority (first match wins).
`BaseScraper` also provides an async `close()` method (default no-op) for releasing scraper-scoped resources (e.g. FlareSolverr sessions). `routers/grabber.py` wraps all scraper usage (`debug_run`, `preview`, `_run_scrape`) in `try/finally: await scraper.close()` so resources are released even on errors.
#### FlareSolverr sidecar
Cloudflare-protected sites (awesomedude.org uses a full "Just a moment…" JS challenge) are fetched via a `flaresolverr` sidecar service defined in `stack/stack.yml` (image `ghcr.io/flaresolverr/flaresolverr:latest`, internal-only, on `novela-net`). The `novela` service depends on it and reads `FLARESOLVERR_URL=http://flaresolverr:8191/v1`.
Helpers in `scrapers/base.py`:
- `flaresolverr_get(url, timeout_ms=None, session=None)` — posts `cmd: request.get` and returns a `SimpleNamespace(text, url)` (drop-in for `httpx.Response` attributes).
- `flaresolverr_session_create()` / `flaresolverr_session_destroy(sid)` — manage browser sessions so Cloudflare cookies stay warm across per-chapter requests. The first request pays the challenge-solve cost; all subsequent chapters reuse the same browser and are much faster.
`scrapers/awesomedude.py` creates a FlareSolverr session in `fetch_book_info`, reuses it in every `fetch_chapter` call, and destroys it in `close()`.
| Scraper | Domain | Login | Notes | | Scraper | Domain | Login | Notes |
|---|---|---|---| |---|---|---|---|
| `ArchiveOfOurOwnScraper` | archiveofourown.org | Optional | Uses authenticity token; adult content gate via `?view_adult=true` | | `ArchiveOfOurOwnScraper` | archiveofourown.org | Optional | Uses authenticity token; adult content gate via `?view_adult=true` |
| `AwesomeDudeScraper` | awesomedude.org | No | Chapter discovery via `.htm/.html` links in same directory; content extracted from largest non-layout block | | `AwesomeDudeScraper` | awesomedude.org | No | Uses FlareSolverr (site is behind Cloudflare); per-book session created in `fetch_book_info`, reused in all `fetch_chapter` calls, destroyed in `close()`. Chapter discovery via `.htm/.html` links in same directory; content extracted from largest non-layout block |
| `CodeysWorldScraper` | codeysworld.org | No | See below | | `CodeysWorldScraper` | codeysworld.org | No | See below |
| `GayAuthorsScraper` | gayauthors.org | Optional | Genres + subgenres from `itemprop="genre"` links; tags from `ipsTags` list | | `GayAuthorsScraper` | gayauthors.org | Optional | Genres + subgenres from `itemprop="genre"` links; tags from `ipsTags` list |
| `IomfatsScraper` | iomfats.org | No | See below; requires chapter URL as entry point | | `IomfatsScraper` | iomfats.org | No | See below; requires chapter URL as entry point |
@ -238,6 +297,7 @@ Nifty classic pages are plain-text email submissions wrapped in a `<pre>` elemen
- Category from URL path (second-to-last segment, e.g. `remembrances` → tag `"Remembrances"`). - Category from URL path (second-to-last segment, e.g. `remembrances` → tag `"Remembrances"`).
- Chapter discovery: `.htm/.html` links in the same directory as the entry URL; audio/image links skipped. No chapter links → single-file story (entry URL is the only chapter). - Chapter discovery: `.htm/.html` links in the same directory as the entry URL; audio/image links skipped. No chapter links → single-file story (entry URL is the only chapter).
- `fetch_chapter()`: removes all `<h1>`/`<h2>` headings, back-navigation links, audio links (`.mp3`), mailto links; falls back to `<body>` when no content wrapper is found. - `fetch_chapter()`: removes all `<h1>`/`<h2>` headings, back-navigation links, audio links (`.mp3`), mailto links; falls back to `<body>` when no content wrapper is found.
- Encoding: response bytes are decoded with `cp1252` (Windows-1252) before parsing. This correctly handles the 0x800x9F range (`…`, `'`, `"`, `—`, etc.) that `iso-8859-1` leaves undefined.
#### IomfatsScraper #### IomfatsScraper
@ -266,9 +326,11 @@ All pages on `tedlouis.com` use opaque token-based routing: `https://tedlouis.co
- No genres, subgenres, tags or description available on the page. - No genres, subgenres, tags or description available on the page.
- `fetch_chapter()`: content from `<div id="chapter">`; strips `<h1 class="story-title">`, `<h2 class="chapter-title">`, `div.chapter-copyright-line`, and `div.chapter-copyright-notice-text` blocks. Chapter title refined from `<h2 class="chapter-title"> <span>`. - `fetch_chapter()`: content from `<div id="chapter">`; strips `<h1 class="story-title">`, `<h2 class="chapter-title">`, `div.chapter-copyright-line`, and `div.chapter-copyright-notice-text` blocks. Chapter title refined from `<h2 class="chapter-title"> <span>`.
#### `xhtml.element_to_xhtml()`Comment handling #### `xhtml.element_to_xhtml()`Text normalization
`bs4.Comment` objects (a `NavigableString` subclass) are now emitted as XML comments: `<!-- … -->`. The `--` sequence (illegal inside XML comments) is sanitised to `- -`. This allows scrapers to embed invisible metadata (e.g. the Nifty `Subject:` header) in chapter content without it appearing in the rendered reader. - `bs4.Comment` objects (a `NavigableString` subclass) are emitted as XML comments: `<!-- … -->`. The `--` sequence (illegal inside XML comments) is sanitised to `- -`. This allows scrapers to embed invisible metadata (e.g. the Nifty `Subject:` header) in chapter content without it appearing in the rendered reader.
- Non-breaking spaces (`\xa0` / `&nbsp;`) in text nodes are replaced with regular spaces before HTML-escaping. Consecutive spaces resulting from this substitution are collapsed to one. This applies in both `element_to_xhtml` and `normalize_wysiwyg_html` via the shared `_clean_text()` helper.
- `collapse_consecutive_breaks(xhtml)` collapses runs of 2+ consecutive `<center><img src=".../break.png" .../></center>` occurrences (with optional whitespace between) into a single break. Applied in `normalize_wysiwyg_html()` (editor save path) and in `routers/grabber.py` on both the preview `converted_xhtml` and the per-chapter `content_html` produced during scraping.
### `routers/search.py` ### `routers/search.py`
- `GET /search` — full-text search page (`search.html`); Enter-to-search, `?q=` param auto-runs on load - `GET /search` — full-text search page (`search.html`); Enter-to-search, `?q=` param auto-runs on load
@ -321,8 +383,8 @@ URL is stored in the `authors` table (`name` unique, `url`, `created_at`, `updat
- `POST /api/backup/oauth/prepare` — save app key + secret, return Dropbox auth URL - `POST /api/backup/oauth/prepare` — save app key + secret, return Dropbox auth URL
- `POST /api/backup/oauth/exchange` — exchange authorization code for refresh token - `POST /api/backup/oauth/exchange` — exchange authorization code for refresh token
- `GET /api/backup/health` — Dropbox connectivity check (includes `schedule_enabled`, `schedule_interval_hours`) - `GET /api/backup/health` — Dropbox connectivity check (includes `schedule_enabled`, `schedule_interval_hours`)
- `GET /api/backup/status` — current backup status - `GET /api/backup/status` — current backup status (`uploaded_files`, `scanned_files`, `size_bytes`, …)
- `GET /api/backup/history` — backup run history (last 20) - `GET /api/backup/history` — backup run history (last 20; each entry includes `uploaded_files` and `scanned_files`)
- `GET /api/backup/progress` — live progress of running backup `{running, done, total, phase}` - `GET /api/backup/progress` — live progress of running backup `{running, done, total, phase}`
- `POST /api/backup/run` — trigger backup (background task) - `POST /api/backup/run` — trigger backup (background task)
- `GET /api/backup/snapshots` — list available snapshots `{ok, snapshots: [{name, created_at}]}` - `GET /api/backup/snapshots` — list available snapshots `{ok, snapshots: [{name, created_at}]}`
@ -338,7 +400,7 @@ URL is stored in the `authors` table (`name` unique, `url`, `created_at`, `updat
- Dropbox backup root stored encrypted in `credentials` (`site='dropbox_backup_root'`). - Dropbox backup root stored encrypted in `credentials` (`site='dropbox_backup_root'`).
- Retention (`snapshots to keep`) stored encrypted in `credentials` (`site='dropbox_backup_retention'`). - Retention (`snapshots to keep`) stored encrypted in `credentials` (`site='dropbox_backup_retention'`).
- Backup schedule (`enabled` + `interval_hours`) stored encrypted in `credentials` (`site='dropbox_backup_schedule'`). - Backup schedule (`enabled` + `interval_hours`) stored encrypted in `credentials` (`site='dropbox_backup_schedule'`).
- Encryption uses `NOVELA_MASTER_KEY` (Fernet). - Encryption uses `NOVELA_MASTER_KEY` env var; falls back to `POSTGRES_PASSWORD`. If neither is set, startup raises a `RuntimeError`. The Fernet instance is cached per process via `@functools.lru_cache(maxsize=1)` — key derivation runs only once.
### Dropbox authentication ### Dropbox authentication
- Preferred: OAuth2 refresh token (does not expire). Set up via the two-step flow on `/backup`: - Preferred: OAuth2 refresh token (does not expire). Set up via the two-step flow on `/backup`:
@ -356,6 +418,8 @@ URL is stored in the `authors` table (`name` unique, `url`, `created_at`, `updat
- Orphan object pruning removes objects no longer referenced by retained snapshots. - Orphan object pruning removes objects no longer referenced by retained snapshots.
- Local manifest cache (`config/backup_manifest.json`) speeds up change detection. - Local manifest cache (`config/backup_manifest.json`) speeds up change detection.
- Database backup is done via `pg_dump` to Dropbox `postgres/`. - Database backup is done via `pg_dump` to Dropbox `postgres/`.
- Dropbox client timeout is set to `300s`; uploads use chunked session uploads with a `16 MB` chunk size (`_DROPBOX_UPLOAD_CHUNK` / `_DROPBOX_UPLOAD_THRESHOLD`) to keep each HTTP request well below the socket timeout on slow connections.
- `backup_log.scanned_files` tracks the number of library files inspected in a run, while `backup_log.files_count` tracks the number of objects actually uploaded (library objects + snapshot + pg_dump). When most files are already deduplicated `files_count` can be as low as 2 while `scanned_files` reflects the full library size.
- `POST /api/backup/run` always starts a background task and returns immediately. - `POST /api/backup/run` always starts a background task and returns immediately.
- `GET /api/backup/progress` returns in-memory progress updated per file; phases: `starting``scanning``uploading``snapshot``pg_dump`. - `GET /api/backup/progress` returns in-memory progress updated per file; phases: `starting``scanning``uploading``snapshot``pg_dump`.
- Scheduler runs in the background (`start_backup_scheduler`) and triggers on interval when enabled. - Scheduler runs in the background (`start_backup_scheduler`) and triggers on interval when enabled.
@ -371,6 +435,12 @@ URL is stored in the `authors` table (`name` unique, `url`, `created_at`, `updat
- `POSTGRES_PASSWORD` - `POSTGRES_PASSWORD`
- `NOVELA_MASTER_KEY` - `NOVELA_MASTER_KEY`
- `CONFIG_DIR` - `CONFIG_DIR`
- `NOVELA_PORT` (default `8099`) — host port mapped to the novela container
- `ADMINER_PORT` (default `8098`) — host port mapped to adminer
Optional:
- `FLARESOLVERR_URL` (default `http://flaresolverr:8191/v1`) — base URL of the FlareSolverr sidecar
- `FLARESOLVERR_TIMEOUT_MS` (default `60000`) — per-request timeout for FlareSolverr calls
Dropbox settings are managed via the web UI on `/backup`. Dropbox settings are managed via the web UI on `/backup`.
@ -521,8 +591,8 @@ When enabled, every page shows a diagonal **DEVELOP** ribbon in the top-left cor
- Empty dir pruning: `prune_empty_dirs(start)` walks up from `start` to `LIBRARY_ROOT`, removing each dir if empty; stops at first non-empty dir. - Empty dir pruning: `prune_empty_dirs(start)` walks up from `start` to `LIBRARY_ROOT`, removing each dir if empty; stops at first non-empty dir.
- Cover strategy: - Cover strategy:
- EPUB: `GET /library/cover/{filename}` checks `library_cover_cache` first; on miss, extracts from ZIP and warms the cache. Cover upload (`POST /library/cover/{filename}`) replaces the image inside the EPUB ZIP (OPF located via `META-INF/container.xml`, old cover found in manifest and removed) and updates the cache so subsequent requests return the new cover immediately. - EPUB: `GET /library/cover/{filename}` checks `library_cover_cache` first; on miss, extracts from ZIP and warms the cache. Cover upload (`POST /library/cover/{filename}`) replaces the image inside the EPUB ZIP (OPF located via `META-INF/container.xml`, old cover found in manifest and removed) and updates the cache so subsequent requests return the new cover immediately.
- PDF: first page rendered as thumbnail, cached - PDF: first page rendered as thumbnail in-memory via `Image.frombytes()` from a PyMuPDF pixmap (no temp file), cached.
- CBR/CBZ: first page extracted, cached - CBR/CBZ: first page extracted, cached.
- Rating storage: - Rating storage:
- EPUB: `<meta name="novela:rating" content="N"/>` in OPF - EPUB: `<meta name="novela:rating" content="N"/>` in OPF
- CBZ: `<NovelaRating>N</NovelaRating>` in `ComicInfo.xml` inside the ZIP - CBZ: `<NovelaRating>N</NovelaRating>` in `ComicInfo.xml` inside the ZIP
@ -538,9 +608,11 @@ When enabled, every page shows a diagonal **DEVELOP** ribbon in the top-left cor
- `list_library_json()` uses `json_agg` in the main query to inline tags per book — eliminates a separate `SELECT * FROM book_tags` query and Python merge loop. - `list_library_json()` uses `json_agg` in the main query to inline tags per book — eliminates a separate `SELECT * FROM book_tags` query and Python merge loop.
- `has_cached_cover` is provided directly via SQL join instead of full cache fetch. - `has_cached_cover` is provided directly via SQL join instead of full cache fetch.
- `reading_sessions` is pre-aggregated in a subquery. - `reading_sessions` is pre-aggregated in a subquery.
- ETag on `/api/library`: cheap `COUNT + MAX(updated_at)` query before full load; `304 Not Modified` on cache hit. - ETag on `/api/library`: single query combining `COUNT(*)` from `library`, `MAX(updated_at)` from `library` and `reading_progress`, and `MAX(id)` from `book_tags`; `304 Not Modified` on cache hit — tag or progress changes now also invalidate the cache.
- Front-end rendering uses `IntersectionObserver` to defer both cover image loading and placeholder canvas drawing until cards enter the viewport — prevents hundreds of simultaneous HTTP requests and canvas operations on initial render. - Front-end rendering uses `IntersectionObserver` to defer both cover image loading and placeholder canvas drawing until cards enter the viewport — prevents hundreds of simultaneous HTTP requests and canvas operations on initial render.
- `renderBooksGrid`, `renderDuplicatesView`, `renderSeriesDetail` all use a single DOM pass: cover `<img>` and `<canvas>` are set up via `card.querySelector` immediately after `innerHTML` is set, eliminating a second full iteration with `document.getElementById` calls. - `renderBooksGrid`, `renderDuplicatesView`, `renderSeriesDetail` all use a single DOM pass: cover `<img>` and `<canvas>` are set up via `card.querySelector` immediately after `innerHTML` is set, eliminating a second full iteration with `document.getElementById` calls.
- CBR/CBZ page list: `cbr_page_list` is cached per `(str(path), mtime)` via `lru_cache(maxsize=64)` — avoids opening the archive twice per page request.
- In-memory job dicts: `JOBS` (grabber) and `BACKUP_TASKS`/`BACKUP_PROGRESS` (backup) are capped at 50 entries; oldest entries are evicted on each new job creation.
- Additional migration indexes: - Additional migration indexes:
- `idx_library_sort_coalesce` - `idx_library_sort_coalesce`
- `idx_library_needs_review` - `idx_library_needs_review`
@ -578,6 +650,23 @@ Same sanitization rules as file-based paths. Uniqueness enforced via `ensure_uni
`GET /library/editor/{filename}` supports DB-stored books. The Monaco editor shows `language: 'html'` for DB books (vs `'xml'` for EPUB). The header shows a title input instead of a read-only chapter name. Unsaved content and titles are preserved across chapter switches via `pendingContent` and `pendingTitles` maps. `editor.focus()` is called after every content load so the editor is immediately interactive. `GET /library/editor/{filename}` supports DB-stored books. The Monaco editor shows `language: 'html'` for DB books (vs `'xml'` for EPUB). The header shows a title input instead of a read-only chapter name. Unsaved content and titles are preserved across chapter switches via `pendingContent` and `pendingTitles` maps. `editor.focus()` is called after every content load so the editor is immediately interactive.
Chapter add and delete are deferred: no API call is made immediately. Structural changes are collected in `pendingDeletes[]` and tracked with a `structureDirty` flag; they are applied (deletes in reverse server-index order, then adds) when Save is triggered. The `saving` flag is set at the start of any delete to prevent a concurrent save from flushing structural changes during the async gap in `loadChapter`.
### Editor toolbar — inline formatting buttons
Four buttons sit in the header toolbar next to Break. All are disabled until a chapter is loaded.
| Button | HTML output | Reader style |
|---|---|---|
| **S** | `<span class="subheading">…</span>` | Red bold text |
| **C** | `<span class="chat">…</span>` | Orange text |
| **→\|** | `<p style="padding-left: 40px;">…</p>` | Indented paragraph |
| **[ ]** | `<div class="novela-comment">…</div>` | Blue left border + tinted background |
Without a selection each button inserts an empty tag with the cursor placed inside. When the selection contains block-level elements (`<p>`, `<div>`, `<h*>`, etc.), `wrapSpan` and `insertIndent` automatically use a `<div>` wrapper instead of `<span>`/`<p>` to keep the HTML valid.
The reader CSS for all four classes lives in the inline `<style>` block of `reader.html` under `#chapter-content`.
### Imagestore ### Imagestore
Images embedded in chapter HTML are stored content-addressed at `library/images/{sha256[:2]}/{sha256}{ext}`. Images embedded in chapter HTML are stored content-addressed at `library/images/{sha256[:2]}/{sha256}{ext}`.
@ -598,7 +687,12 @@ Images embedded in chapter HTML are stored content-addressed at `library/images/
`GET /api/library/export-epub/{filename}` streams an EPUB built from DB content: `GET /api/library/export-epub/{filename}` streams an EPUB built from DB content:
1. Query metadata, tags, chapters, cover from DB 1. Query metadata, tags, chapters, cover from DB
2. Per chapter: `_rewrite_db_images_for_epub` strips `/library/db-images/` prefix, reads files from `IMAGES_DIR`, deduplicates by sha256, assigns `OEBPS/Images/{sha256}{ext}` paths, rewrites `img[src]` to `../Images/…` 2. Per chapter: `_rewrite_db_images_for_epub` strips `/library/db-images/` prefix, reads files from `IMAGES_DIR`, deduplicates by sha256, assigns `OEBPS/Images/{sha256}{ext}` paths, rewrites `img[src]` to `../Images/…`
3. Build EPUB via `make_epub()`; return as `Content-Disposition: attachment` 3. Build EPUB via `make_epub(..., include_intro=False)` — the stored chapter 0 is the single source of truth for the info page, so `make_epub` omits its own `intro.xhtml`, manifest item, spine itemref and NCX navPoint (remaining `playOrder` values start at 1); return as `Content-Disposition: attachment`
4. Leading `<h1>` stripping in `reader.py` (`get_chapter_html` and the DB→EPUB export) is skipped when `title == "Book Info"`, so the `<h1>{book title}</h1>` at the top of that chapter's body survives.
Legacy DB books converted before the Book Info change have no stored info page; their exports will lack an intro until the editor toolbar's Info page button is used.
Shared helper `epub_utils.build_book_info_body_html(title, author, info)` returns the inner-body HTML fragment used for DB storage; starts with `<h1>{title}</h1>`, skips empty fields, and separates description and source/updated blocks with `<hr/>`.
--- ---

View File

@ -1,5 +1,161 @@
# Develop Changelog # Develop Changelog
## 2026-05-09
- Reader: progress is now monotonic across devices — saved position only advances, never rewinds
- `POST /library/progress/{filename}` in `routers/reader.py` parses the incoming `cfi` as `(chapterIndex, scrollFrac)` and the currently stored row the same way, then skips the write when `new_pos <= cur_pos`
- Same `cfi` format is used for EPUB, PDF and CBR/CBZ, so one tuple comparison covers all readers
- Explicit Read/Unread actions still clear the row (`mark-read` / `mark-unread` delete from `reading_progress`), so users can deliberately reset and start over
- Reason: when reading the same book on device A (chapter 12) and then continuing on device B (chapter 15), opening device A again previously sent its stale chapter-12 cfi back to the server and overwrote the further progress; now the older position is ignored
---
## 2026-04-22 (5)
- Grabber: newly converted books now appear in the **New** view again
- Both the DB-storage and file-EPUB branches in `routers/grabber.py` set `needs_review: True` on `upsert_book` (was `False`); the New view filters on `needs_review == True`, so previously grabbed books never showed up there
- Matches the behavior of disk-scanned imports (`library.py` already sets `needs_review = True` for freshly discovered files)
---
*Released as v0.2.8 on 2026-04-22*
## 2026-04-22 (4)
- Break detection: runs of consecutive break images are now collapsed to a single break
- New helper `collapse_consecutive_breaks()` in `xhtml.py` matches 2+ consecutive `<center><img src=".../break.png" .../></center>` occurrences (with optional whitespace in between) and replaces them with one
- Applied in `normalize_wysiwyg_html()` (editor save path) and in `routers/grabber.py` on both the preview `converted_xhtml` and the per-chapter `content_html` produced during scraping
- Docs: `docs/TECHNICAL.md` updated to cover previously missing changes — `POST /api/edit/intro/{filename}` and the `title` field on file-EPUB chapter save; FlareSolverr sidecar and `BaseScraper.close()`; `AwesomeDudeScraper` uses FlareSolverr; `make_epub(include_intro=…)` and `epub_utils.build_book_info_body_html`; grabber DB flow stores Book Info as chapter 0; `"Book Info"` h1-strip skip in reader; new env vars (`FLARESOLVERR_URL`, `FLARESOLVERR_TIMEOUT_MS`, `NOVELA_PORT`, `ADMINER_PORT`); `collapse_consecutive_breaks()` helper
---
*Released as v0.2.7 on 2026-04-22*
## 2026-04-22 (3)
- Scrapers: Cloudflare "Just a moment…" challenges are now solved via a new FlareSolverr sidecar service so books on protected sites (awesomedude.org) can be scraped again
- New service `flaresolverr` in `stack/stack.yml` (image `ghcr.io/flaresolverr/flaresolverr:latest`, internal-only, on `novela-net`); `novela` service gains `FLARESOLVERR_URL=http://flaresolverr:8191/v1` and a `depends_on: flaresolverr`
- New helpers in `scrapers/base.py`: `flaresolverr_get(url, timeout_ms=None, session=None)` posts `cmd: request.get` and returns a `SimpleNamespace(text, url)` (drop-in for `httpx.Response` attributes); `flaresolverr_session_create()` and `flaresolverr_session_destroy(sid)` manage browser sessions so Cloudflare cookies stay warm across per-chapter requests (first page solves the challenge, all further chapters reuse the same browser — much faster)
- Configurable via `FLARESOLVERR_URL` and `FLARESOLVERR_TIMEOUT_MS` env vars (defaults: service DNS name and 60000 ms)
- `BaseScraper` gained an async `close()` method (default no-op) for releasing scraper-scoped resources; `scrapers/awesomedude.py` creates a FlareSolverr session in `fetch_book_info`, reuses it in all `fetch_chapter` calls, and destroys it in `close()`
- `routers/grabber.py` now wraps all scraper usage in `try/finally: await scraper.close()` so sessions are released even on errors
- Stack uses `${NOVELA_PORT}` and `${ADMINER_PORT}` (defined in `stack/novela.env` as 8099 / 8098) so host-port values don't diverge between environments
---
*Released as v0.2.6 on 2026-04-22*
## 2026-04-22 (2)
- Editor: chapter titles are now editable for file-EPUB books as well (DB books already supported this)
- Frontend (`editor.js`): the `chapter-title-input` is always shown (the read-only `header-chapter` label is hidden for both storage types), title changes mark the chapter dirty, and the title is sent in both `saveChapter` and `saveAllChapters`
- Backend (`routers/editor.py`): `POST /api/edit/chapter/{index}/{filename}` for file EPUBs now accepts `title` alongside `content`; if the title changed it calls new helper `_update_epub_navpoint_title(path, href, new_title)` which locates the matching NCX `navPoint` by content src and rewrites its `<text>` via `rewrite_epub_entries`
---
## 2026-04-22
- Book Info page: new "Info page" button in the editor toolbar generates a gayauthors-style info page and inserts it as the first chapter
- New endpoint `POST /api/edit/intro/{filename}` builds the page from stored metadata (author, genres, sub-genres, tags, description, source, updated) and prepends it
- DB books: shifts existing `chapter_index` values up by one and inserts `"Book Info"` at index 0 (two-step negation to avoid unique-constraint violations)
- File EPUBs: writes a new `intro_<hex>.xhtml` via `make_intro_xhtml`, adds a manifest item, places the `itemref` at the start of the spine, and inserts a navPoint at the top of the NCX with renumbered `playOrder`
- Empty metadata fields are skipped
- Option A: no duplicate detection — clicking the button on a book that already has one will add a second page
- Grabber convert: DB-storage conversions now persist the Book Info page as a real stored chapter at index 0 (so it shows up in the editor and reader); EPUB-storage conversions continue to produce `intro.xhtml` via `make_epub` as before
- DB → EPUB export (`reader.py`): no longer synthesises `intro.xhtml` (`make_epub(..., include_intro=False)`) — the stored chapter 0 is the single source of truth
- Legacy DB books converted before this change have no stored info page; their exports will lack an intro page until the button is used
- `make_epub` gained an `include_intro: bool = True` parameter; when false, the `intro.xhtml` file, its manifest item, its spine `itemref`, and its NCX navPoint are all omitted (remaining `playOrder` values start at 1)
- Shared helper: `epub_utils.build_book_info_body_html(title, author, info)` — returns the inner-body HTML fragment used for DB storage, starting with `<h1>{title}</h1>`; skips empty fields and separates description and source/updated blocks with `<hr/>`
- DB-storage info page: chapter title is `"Book Info"`; to preserve the leading `<h1>{book title}</h1>` in its body, the leading-h-tag stripping in `reader.py` (`get_chapter_html` and the DB→EPUB export) is now skipped when `title == "Book Info"`
---
*Released as v0.2.5 on 2026-04-22*
## 2026-04-21 (2)
- Backup: separate "scanned" vs "uploaded" counters in backup_log and UI
- New migration `backup_log_scanned_files` adds `scanned_files INTEGER` column to `backup_log`
- `_run_backup_internal` now returns `(scanned_files, uploaded_count, uploaded_size)`
- `_finish_backup_log(...)` accepts `scanned_files=`; `_run_backup_job` passes the value through
- `/api/backup/status` and `/api/backup/history` return `uploaded_files` (old `files_count`) and `scanned_files`
- `backup.html`: Latest Status now shows separate "Scanned" and "Uploaded" rows; History table renamed "Files" column to two columns "Scanned" / "Uploaded" (colspan of loading/empty states updated to 8)
- Backup page: live phase indicator under the Run buttons while a backup is running
- New `#run-progress` status line is filled by `pollRunProgress()` which polls `/api/backup/progress` every 3 s
- Phase labels spelled out: `scanning library`, `uploading library objects`, `uploading snapshot`, `uploading pg_dump (may take minutes)` — so users understand why the counter sits at `N / N` during the final phases
- Polling starts on page load (via `refreshAll()`) and is re-kicked when Run Live/Dry is clicked; it auto-stops when progress reports `running=false`
---
*Released as v0.2.4 on 2026-04-21*
## 2026-04-21
- Backup: fix Dropbox `Read timed out. (read timeout=120)` error on large uploads
- `dropbox.Dropbox(...)` `timeout` raised from `120` to `300` in `routers/backup.py` (`_dbx()`, both refresh-token and legacy-token branches)
- `_DROPBOX_UPLOAD_CHUNK` reduced from `100 * 1024 * 1024` (100 MB) to `16 * 1024 * 1024` (16 MB)
- `_DROPBOX_UPLOAD_THRESHOLD` lowered to match (`16 * 1024 * 1024`) so the session upload path is used earlier
- Net effect: each chunk PUT finishes well within the socket timeout, and a stalled connection gets 5 minutes instead of 2 before erroring
---
*Released as v0.2.3 on 2026-04-21*
## 2026-04-16 (2)
- Editor: four inline formatting buttons added to the chapter editor toolbar
- **S** (subheading) — wraps selection in `<span class="subheading">` (red, bold in reader)
- **C** (chat) — wraps selection in `<span class="chat">` (orange in reader)
- **→|** (indent) — wraps selection in `<p style="padding-left: 40px;">` (or `<div>` when the selection contains block elements)
- **[ ]** (comment) — wraps selection in `<div class="novela-comment">` (blue left border + subtle background in reader)
- Without a selection: the tag is inserted at the cursor with the cursor positioned inside
- All four buttons disabled when no chapter is loaded; enabled state mirrors the existing Break button
- Wrap logic auto-detects block content in the selection: `wrapSpan` and `insertIndent` use a `<div>` wrapper instead of `<span>`/`<p>` when the selection contains `<p>`, `<div>`, `<h*>` etc. to keep the HTML valid
- Reader CSS extended: `span.subheading`, `span.chat`, `p[style*="padding-left"]`, and `.novela-comment` are styled in `reader.html`
## 2026-04-16 (1)
- Startup: migration logging zichtbaar in Docker logs
- `logging.basicConfig(level=logging.INFO)` toegevoegd aan `main.py`
- `migrations.py` logt per migratie of deze overgeslagen of uitgevoerd wordt (met duur in ms)
- Samenvattingsregel bij afsluiting: "all already applied" of "N executed"
---
*Released as v0.2.1 on 2026-04-16*
## 2026-04-15 (4)
- Scraper: fix encoding for Codey's World
- Pages are decoded with `cp1252` (Windows-1252) instead of relying on `r.text` or `html.parser` auto-detection via `iso-8859-1`
- `cp1252` correctly maps the 0x800x9F byte range: `…`, `'`, `'`, `"`, `"`, ``, `—` etc. now render correctly instead of producing replacement characters
- XHTML: normalize non-breaking spaces globally
- `\xa0` (HTML `&nbsp;`) is now replaced with a regular space before HTML-escaping in both `element_to_xhtml` and `normalize_wysiwyg_html`
- Consecutive spaces resulting from this substitution are collapsed to a single space
- Applies to all scrapers
## 2026-04-15 (3)
- Editor: chapter add/delete deferred until Save is clicked
- Adding or deleting a chapter no longer triggers an immediate API call
- All structural changes are collected in memory and applied (in the correct order) when the Save button is pressed
- Deletes are applied in reverse server-index order to avoid index shifting errors; new chapters are appended afterwards
- Fix: `saving` flag is set at the start of a delete operation to prevent a concurrent `saveChapter` from flushing pending changes during the async gap in `loadChapter`
- Refactor: fix unclosed file handles in `epub.py`
- `make_epub` and `write_epub_file` now use `Path.read_text()` / `Path.read_bytes()` instead of bare `open()` calls
- Refactor: eliminate temp file in `pdf_cover_thumb`
- Cover thumbnail is now generated fully in-memory via `Image.frombytes()` from the PyMuPDF pixmap — removes the race condition when multiple requests hit the same PDF simultaneously
- Refactor: harden `security.py`
- Hardcoded fallback encryption key removed; raises `RuntimeError` with a clear message when neither `NOVELA_MASTER_KEY` nor `POSTGRES_PASSWORD` is set
- Fernet instance cached with `@functools.lru_cache(maxsize=1)` — key derivation runs only once per process
- Add `GET /health` endpoint
- Runs `SELECT 1` against the database and returns `{"ok": true/false}`
- Performance: cap in-memory job dicts at 50 entries
- `JOBS` in `grabber.py` and `BACKUP_TASKS`/`BACKUP_PROGRESS` in `backup.py` are trimmed (oldest-first) whenever a new entry is added
- Performance: improve ETag accuracy for the library API
- ETag now includes `MAX(updated_at)` from `reading_progress` and `MAX(id)` from `book_tags`, so tag changes and progress updates correctly invalidate the client cache
- Performance: cache CBR/CBZ page list
- `cbr_page_list` is cached per `(path, mtime)` via `lru_cache(maxsize=64)` — avoids opening the archive twice per page request
- Refactor: normalize transaction handling in `builder.py`
- All `conn.commit()` calls replaced with `with conn:` context manager, consistent with the rest of the codebase
---
*Released as v0.2.0 on 2026-04-15*
## 2026-04-15 (2)
- Editor: fix chapter add failing with UniqueViolation on DB-stored books
- PostgreSQL checks the unique constraint on `(filename, chapter_index)` per row during `UPDATE`, so incrementing consecutive indices in a single statement (e.g. 1→2 while 2 exists) raised a `UniqueViolation`
- Fixed by using a two-step approach: first shift affected rows to temporary negative values, then to their final positive values
- Refactor: eliminate duplicated EPUB helper functions across `reader.py`, `editor.py`, `common.py`
- New `epub_utils.py` with shared `find_opf_path`, `norm_href`, `epub_spine`, `make_new_chapter_xhtml`, `rewrite_epub_entries`
- Fixes the double-escaped `\\\\s*` regex in the old `_epub_spine` OPF path lookup (was silently falling back to directory scan)
- `rewrite_epub_entries` combines crash-safe `.tmp.epub` write with `ZIP_STORED` for the `mimetype` entry (EPUB spec requirement)
- All private `_epub_spine`, `_norm_href`, `_find_opf_path`, `_make_new_chapter_xhtml`, `_rewrite_epub_entries` copies removed from `reader.py`, `editor.py`, `common.py`
- Migrations: run each migration only once via `schema_migrations` tracking table
- Eliminates heavy `rebuild_chapter_tsv_with_title` UPDATE running on every container restart
- Reduces startup from 37 separate DB connections to 1
## 2026-04-15 (1) ## 2026-04-15 (1)
- Reader: font size control in reading settings - Reader: font size control in reading settings
- New "Font size" slider (80150%, default 105%) in the settings drawer, between "Content width" and "Text colour" - New "Font size" slider (80150%, default 105%) in the settings drawer, between "Content width" and "Text colour"
@ -581,3 +737,8 @@ This file tracks changes on the `develop` line.
- Fixed tag/genre search and tag-pill navigation being broken: - Fixed tag/genre search and tag-pill navigation being broken:
- `renderGenreView` was filtering on `b.genres` (non-existent field); now uses `bookGenres()`, `bookSubgenres()`, `bookPlainTags()` - `renderGenreView` was filtering on `b.genres` (non-existent field); now uses `bookGenres()`, `bookSubgenres()`, `bookPlainTags()`
- `renderSearchResults` had the same bug; search now covers title, author, genres, sub-genres, and tags - `renderSearchResults` had the same bug; search now covers title, author, genres, sub-genres, and tags
- Made reading progress monotonic across devices:
- `POST /library/progress/{filename}` now rejects any save whose `(chapter_index, scrollFrac)` is not strictly ahead of the stored position (returns `{"ok": true, "skipped": true}`)
- prevents device A from overwriting further progress saved by device B when switching between devices without closing the book
- also prevents bookmark-based backward navigation (e.g. jumping back to correct an earlier chapter) from clobbering the furthest-read position
- progress reset remains via the explicit Read/Unread actions, which clear the `reading_progress` row

View File

@ -1,5 +1,143 @@
# Changelog # Changelog
## v0.2.9 — 2026-05-09
### Bug fixes
- Reader: reading position is now monotonic across devices — saved position only advances, never rewinds. Previously, reading the same book on a second device (e.g. continuing from chapter 12 to chapter 15 on an iPad) and then opening the book again on the original device would overwrite the further position with the older one, dropping the user back to chapter 12. The progress endpoint now compares the incoming `(chapterIndex, scrollFrac)` to the stored value and only writes when the new position is strictly further. Explicit **Mark as read** / **Mark as unread** still clears the row, so deliberate restarts still work.
---
## v0.2.8 — 2026-04-22
### Bug fixes
- Grabber: newly converted books now appear in the **New** view again. Both the DB-storage and file-EPUB branches in `routers/grabber.py` now persist `needs_review = True` on `upsert_book` (was `False`); the New view filters on `needs_review`, so previously grabbed books never showed up there. Disk-scanned imports already behaved correctly.
---
## v0.2.7 — 2026-04-22
### Bug fixes
- Break detection: runs of two or more consecutive scene-break images are now collapsed to a single break. Previously some books ended up with several identical `<center><img src=".../break.png"/></center>` lines directly after each other; the reader and exported EPUBs now show only one.
### Internal
- New helper `collapse_consecutive_breaks()` in `xhtml.py` matches 2+ consecutive break-image `<center>` lines (with optional whitespace between) and replaces them with a single break. Applied in `normalize_wysiwyg_html()` (editor save path) and in `routers/grabber.py` on both the preview `converted_xhtml` and the per-chapter `content_html` produced during scraping.
- `docs/TECHNICAL.md` updated to cover previously missing changes: `POST /api/edit/intro/{filename}` and the `title` field on file-EPUB chapter save; FlareSolverr sidecar and `BaseScraper.close()`; `AwesomeDudeScraper` uses FlareSolverr; `make_epub(include_intro=…)` and `epub_utils.build_book_info_body_html`; grabber DB flow stores Book Info as chapter 0; `"Book Info"` h1-strip skip in reader; new env vars (`FLARESOLVERR_URL`, `FLARESOLVERR_TIMEOUT_MS`, `NOVELA_PORT`, `ADMINER_PORT`); `collapse_consecutive_breaks()` helper.
---
## v0.2.6 — 2026-04-22
### New features
- Scrapers: Cloudflare-protected sites (e.g. awesomedude.org, which moved fully behind a "Just a moment…" JavaScript challenge) can be scraped again via a new **FlareSolverr** sidecar service that solves the challenge in a headless browser; the novela container uses FlareSolverr for both the book-info page and every chapter fetch
- Per-book FlareSolverr sessions: the scraper creates one browser session at the start of a book, reuses it across all chapters (Cloudflare cookies stay warm), and destroys it on completion — so only the first request pays the full challenge-solve cost and subsequent chapters are much faster
### Internal
- `stack/stack.yml` adds a `flaresolverr` service (image `ghcr.io/flaresolverr/flaresolverr:latest`, internal-only, on `novela-net`); `novela` gains `FLARESOLVERR_URL=http://flaresolverr:8191/v1` and a `depends_on: flaresolverr`
- Host port mappings in `stack/stack.yml` are now driven by `${NOVELA_PORT}` and `${ADMINER_PORT}`, defaulted in `stack/novela.env` to `8099` / `8098`; production stacks can override without diverging from the repo
- New helpers in `scrapers/base.py`: `flaresolverr_get(url, timeout_ms=None, session=None)` returns a `SimpleNamespace(text, url)` (drop-in for `httpx.Response` attributes); `flaresolverr_session_create()` / `flaresolverr_session_destroy(sid)` manage browser sessions; configurable via `FLARESOLVERR_URL` and `FLARESOLVERR_TIMEOUT_MS` env vars
- `BaseScraper` gained an async `close()` method (default no-op) so scrapers can release scoped resources
- `scrapers/awesomedude.py` creates a FlareSolverr session in `fetch_book_info`, reuses it in every `fetch_chapter` call, and destroys it in `close()`
- `routers/grabber.py` now wraps all three scraper usages (`debug_run`, `preview`, `_run_scrape`) in `try/finally: await scraper.close()` so FlareSolverr sessions are always released, even on errors
---
## v0.2.5 — 2026-04-22
### New features
- Editor: **Info page** button in the chapter editor toolbar generates a gayauthors-style book-info page (title, author, genres, sub-genres, tags, description, source, updated) and inserts it as the first chapter; empty metadata fields are skipped; no duplicate detection — clicking it again will add another page
- Editor: **chapter titles are now editable for file-EPUB books** (DB books already supported this); the chapter-title input in the header works for both storage types, and for file EPUBs the matching NCX `navPoint` is updated on save so the table of contents reflects the new title
- Grabber: DB-storage conversions now persist the Book Info page as a real stored chapter at index 0, so it is visible in the editor and reader (EPUB-storage conversions continue to produce `intro.xhtml` via `make_epub` as before)
### Internal
- New endpoint `POST /api/edit/intro/{filename}` — for DB books, shifts existing `chapter_index` values up by one via a two-step negation (to avoid unique-constraint violations) and inserts `"Book Info"` at index 0; for file EPUBs, writes a new `intro_<hex>.xhtml` via `make_intro_xhtml`, adds a manifest item, places the `itemref` at the start of the spine, and inserts a navPoint at the top of the NCX with renumbered `playOrder`
- `POST /api/edit/chapter/{index}/{filename}` for file EPUBs now accepts a `title` field alongside `content` and updates the matching NCX `navPoint` text when it changes
- `make_epub` gained an `include_intro: bool = True` parameter; DB → EPUB export (`reader.py`) calls it with `include_intro=False` because the stored chapter 0 is now the single source of truth for the info page
- `reader.py` leading-h-tag stripping (`get_chapter_html` and the DB→EPUB export) is skipped when `title == "Book Info"`, so the `<h1>{book title}</h1>` in that chapter's body survives
- New helper `epub_utils.build_book_info_body_html(title, author, info)` returns the inner-body HTML fragment for DB storage; skips empty fields and separates description and source/updated blocks with `<hr/>`
---
## v0.2.4 — 2026-04-21
### Improvements
- Backup: status and history now clearly distinguish **Scanned** (number of library files inspected) from **Uploaded** (objects actually sent to Dropbox — library + snapshot + pg_dump); previously only the upload count was shown, which caused confusion when most files were already deduplicated and the number looked suspiciously low (e.g. `files=2` while the library contained 952 books)
- Backup page: a live phase indicator is shown under the Run buttons while a backup is running (`scanning library`, `uploading library objects`, `uploading snapshot`, `uploading pg_dump`), so it is clear the process is not stuck at `N / N` while the snapshot and pg_dump are being uploaded
### Internal
- New migration `backup_log_scanned_files` adds a `scanned_files` column to `backup_log`; historical rows keep `NULL` for this column
- `/api/backup/status` and `/api/backup/history` now return `uploaded_files` and `scanned_files`; the old `files_count` key was renamed to `uploaded_files` — external consumers (if any) should update accordingly
---
## v0.2.3 — 2026-04-21
### Bug fixes
- Backup: Dropbox uploads no longer fail with `HTTPSConnectionPool ... Read timed out. (read timeout=120)` — the Dropbox client timeout was raised from 120s to 300s and the upload chunk size was reduced from 100 MB to 16 MB so each chunk completes comfortably within the timeout window
---
## v0.2.2 — 2026-04-16
### New features
- Editor: **four inline formatting buttons** added to the chapter editor toolbar
- **S** — wraps selection in `<span class="subheading">` (red bold text in the reader)
- **C** — wraps selection in `<span class="chat">` (orange text in the reader)
- **→|** — wraps selection in an indented paragraph (`padding-left: 40px`)
- **[ ]** — wraps selection in a comment block with a blue left border and tinted background
- Without a selection each button inserts an empty tag with the cursor placed inside
- Wrap logic automatically uses a `<div>` when the selection contains block elements, keeping the HTML valid
---
## v0.2.1 — 2026-04-16
### Improvements
- Startup: migration progress is now visible in Docker logs — each migration logs whether it was skipped or executed (with duration in ms); a summary line at the end shows either "all already applied" or how many were executed
---
## v0.2.0 — 2026-04-15
### New features
- Editor: **chapter add and delete are now deferred** — structural changes are no longer saved immediately; they are applied in the correct order when the Save button is pressed
- Operations: **`GET /health` endpoint** — returns `{"ok": true}` when the database is reachable; suitable for container health checks and monitoring
### Bug fixes
- Editor: adding a chapter to a DB-stored book no longer fails with a UniqueViolation — PostgreSQL was checking the unique constraint on `(filename, chapter_index)` mid-update; fixed with a two-step index shift
- Scraper: Codey's World pages now decode correctly — pages are read as Windows-1252 (cp1252), which correctly maps the 0x800x9F byte range; characters like `…`, `'`, `"`, `—` no longer appear as replacement characters
- XHTML conversion: `&nbsp;` followed by a regular space no longer produces a double space — non-breaking spaces are normalized to regular spaces and consecutive spaces are collapsed; applies to all scrapers
### Improvements
- Startup: each database migration now runs only once — a `schema_migrations` tracking table prevents heavy migrations (such as the chapter TSV rebuild) from re-running on every container restart; startup connection overhead reduced from 37 separate connections to 1
- Library API: ETag now reflects changes to tags and reading progress — `MAX(updated_at)` from `reading_progress` and `MAX(id)` from `book_tags` are included; tag edits and progress updates now correctly invalidate the client cache
- CBR/CBZ reader: page list is cached per file and modification time — avoids opening the archive twice per page request
- Grabber and backup: in-memory job dicts are capped at 50 entries to prevent unbounded memory growth
### Internal
- Shared `epub_utils.py` module with deduplicated EPUB helper functions — eliminates near-identical copies of `epub_spine`, `find_opf_path`, `norm_href`, `make_new_chapter_xhtml`, and `rewrite_epub_entries` that existed across `reader.py`, `editor.py`, and `common.py`; fixes a double-escaped regex in the old OPF path lookup
- `pdf_cover_thumb` no longer writes a temporary file — the cover thumbnail is generated fully in-memory via `Image.frombytes()`, eliminating a race condition under concurrent requests
- `security.py`: hardcoded fallback encryption key removed; raises a clear error at startup when neither `NOVELA_MASTER_KEY` nor `POSTGRES_PASSWORD` is configured; Fernet instance cached per process
- `builder.py`: all explicit `conn.commit()` calls replaced with `with conn:` context manager, consistent with the rest of the codebase
---
## v0.1.12 — 2026-04-15 ## v0.1.12 — 2026-04-15
### New features ### New features

View File

@ -8,3 +8,7 @@ NOVELA_MASTER_KEY=change-me-long-random-secret
# Path for backup manifest/config inside container (default: config) # Path for backup manifest/config inside container (default: config)
CONFIG_DIR=config CONFIG_DIR=config
# Host ports (override per environment; the real stack uses different values)
NOVELA_PORT=8099
ADMINER_PORT=8098

View File

@ -5,7 +5,7 @@ services:
container_name: novela container_name: novela
restart: unless-stopped restart: unless-stopped
ports: ports:
- "8099:8000" - "${NOVELA_PORT}:8000"
environment: environment:
POSTGRES_HOST: postgres POSTGRES_HOST: postgres
POSTGRES_PORT: 5432 POSTGRES_PORT: 5432
@ -13,11 +13,23 @@ services:
POSTGRES_USER: ${POSTGRES_USER} POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
NOVELA_MASTER_KEY: ${NOVELA_MASTER_KEY} NOVELA_MASTER_KEY: ${NOVELA_MASTER_KEY}
FLARESOLVERR_URL: http://flaresolverr:8191/v1
volumes: volumes:
- /docker/appdata/novela/library:/app/library - /docker/appdata/novela/library:/app/library
- /docker/appdata/novela/config:/app/config - /docker/appdata/novela/config:/app/config
depends_on: depends_on:
- postgres - postgres
- flaresolverr
networks:
- novela-net
flaresolverr:
image: ghcr.io/flaresolverr/flaresolverr:latest
container_name: novela-flaresolverr
restart: unless-stopped
environment:
LOG_LEVEL: info
TZ: Europe/Amsterdam
networks: networks:
- novela-net - novela-net
@ -39,7 +51,7 @@ services:
container_name: novela-adminer container_name: novela-adminer
restart: unless-stopped restart: unless-stopped
ports: ports:
- "8098:8080" - "${ADMINER_PORT}:8080"
networks: networks:
- novela-net - novela-net