From aeed71a8ff755c54bc1a9eca51d714aecaabef68 Mon Sep 17 00:00:00 2001 From: Ivo Oskamp Date: Wed, 15 Apr 2026 21:39:20 +0200 Subject: [PATCH] v0.1.12: reader font size slider, new scrapers, series nav, series_volume, and more Co-Authored-By: Claude Sonnet 4.6 --- containers/novela/changelog.py | 173 +++++++++ containers/novela/migrations.py | 30 ++ containers/novela/routers/backup.py | 29 +- containers/novela/routers/builder.py | 3 +- containers/novela/routers/bulk_import.py | 5 +- containers/novela/routers/changelog.py | 3 +- containers/novela/routers/common.py | 28 +- containers/novela/routers/editor.py | 3 +- containers/novela/routers/following.py | 3 +- containers/novela/routers/grabber.py | 29 +- containers/novela/routers/library.py | 125 +++++-- containers/novela/routers/reader.py | 92 ++++- containers/novela/routers/search.py | 37 +- containers/novela/routers/settings.py | 58 ++- containers/novela/scrapers/__init__.py | 10 + containers/novela/scrapers/codeysworld.py | 208 +++++++++++ containers/novela/scrapers/iomfats.py | 267 ++++++++++++++ containers/novela/scrapers/nifty.py | 358 +++++++++++++++++++ containers/novela/scrapers/nifty_new.py | 310 ++++++++++++++++ containers/novela/scrapers/tedlouis.py | 139 +++++++ containers/novela/shared_templates.py | 18 + containers/novela/static/book.js | 55 +-- containers/novela/static/library.css | 21 ++ containers/novela/static/library.js | 95 +++-- containers/novela/static/sidebar.css | 24 ++ containers/novela/templates/_sidebar.html | 4 + containers/novela/templates/backup.html | 2 +- containers/novela/templates/book.html | 31 +- containers/novela/templates/builder.html | 2 +- containers/novela/templates/bulk_import.html | 124 ++++--- containers/novela/templates/changelog.html | 2 +- containers/novela/templates/credentials.html | 2 +- containers/novela/templates/debug.html | 2 +- containers/novela/templates/editor.html | 2 +- containers/novela/templates/following.html | 2 +- containers/novela/templates/grabber.html | 2 +- containers/novela/templates/home.html | 2 +- containers/novela/templates/index.html | 2 +- containers/novela/templates/library.html | 2 +- containers/novela/templates/reader.html | 87 ++++- containers/novela/templates/search.html | 50 ++- containers/novela/templates/settings.html | 95 ++++- containers/novela/templates/stats.html | 2 +- containers/novela/xhtml.py | 8 +- docs/TECHNICAL.md | 153 +++++++- docs/changelog-develop.md | 215 +++++++++++ docs/changelog.md | 104 ++++++ 47 files changed, 2795 insertions(+), 223 deletions(-) create mode 100644 containers/novela/scrapers/codeysworld.py create mode 100644 containers/novela/scrapers/iomfats.py create mode 100644 containers/novela/scrapers/nifty.py create mode 100644 containers/novela/scrapers/nifty_new.py create mode 100644 containers/novela/scrapers/tedlouis.py create mode 100644 containers/novela/shared_templates.py diff --git a/containers/novela/changelog.py b/containers/novela/changelog.py index b57d900..2b40ce7 100644 --- a/containers/novela/changelog.py +++ b/containers/novela/changelog.py @@ -3,6 +3,179 @@ Changelog data for Novela """ CHANGELOG = [ + { + "version": "v0.1.12", + "date": "2026-04-15", + "summary": "Font size slider in the reader settings drawer.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "Reader: font size slider in the reading settings drawer — adjust text size from 80% to 150%; setting is saved per device so iPad and desktop each remember their own preference", + ], + }, + ], + }, + { + "version": "v0.1.11", + "date": "2026-04-13", + "summary": "Comma-separated values in genre, subgenre and tag inputs are now split into individual tags.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Edit metadata: pasting or typing a comma-separated list in the genre, subgenre or tag input now adds each value as a separate tag instead of one combined tag", + ], + }, + ], + }, + { + "version": "v0.1.10", + "date": "2026-04-12", + "summary": "Series navigation in the reader, series_volume support for annual comics, archive a series in one click, and a TedLouis scraper fix.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "Reader: prev/next volume buttons in the header for books that are part of a series — buttons appear automatically when the book has adjacent volumes; tooltip shows the volume number and title; marking a book as read redirects directly to the next volume in the reader instead of the book detail page", + "Comics: series_volume field for annual series where issue numbers restart each year (e.g. Donald Duck (1982) [15]) — stored in the database and EPUB OPF; displayed as '(year)' after the series name on the book detail page; sorting respects series_volume before series_index; supported in Bulk Import via %series_volume% placeholder and 'Year/Vol.' shared field", + "Library: archive or unarchive an entire series in one click — 'Archive series' / 'Unarchive series' button in the series detail view; updates all books in the series via a single SQL UPDATE and recalculates sidebar counters without a page reload", + ], + }, + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "TedLouis scraper: title extraction no longer includes the 'Back' button text or the author byline — only direct text nodes of the title heading are used", + ], + }, + ], + }, + { + "version": "v0.1.9", + "date": "2026-04-08", + "summary": "Five new scrapers (Nifty, codeysworld.org, iomfats.org, tedlouis.com), break image settings, and bug fixes.", + "sections": [ + { + "title": "New features", + "type": "feature", + "changes": [ + "New scraper: Nifty.org (classic) — scrapes plain-text email-format stories; email headers stripped, boilerplate paragraphs auto-detected and hidden, scene-break patterns converted to break images", + "New scraper: new.nifty.org — scrapes the Next.js version of Nifty; reads chapter content from RSC payload when the static HTML does not include it; boilerplate detection shared with classic Nifty", + "New scraper: codeysworld.org — single-file and multi-chapter stories; title and author extracted from heading elements; category from URL path stored as tag; navigation links and audio links stripped from chapter content", + "New scraper: iomfats.org — all stories are listed on a single author page; provide any chapter URL and the scraper finds the correct story automatically; supports single stories and multi-part series (series name, book title, and series index derived from the page structure)", + "New scraper: tedlouis.com — all pages use opaque token-based routing (?t=TOKEN); provide the story index URL and the scraper collects all chapter links from the three-column chapter list", + "Settings: break image upload — upload a custom PNG/JPG/WebP to use as the scene break image in all converted books; stored in the imagestore and applied to both DB-stored and EPUB-format books", + "Settings: develop mode toggle — shows a DEVELOP banner and updates the page title across all pages when enabled", + ], + }, + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Break images were not displayed in DB-stored books — the image path '../Images/break.png' is a relative EPUB path that does not exist for DB content; DB mode now uses '/static/break.png'", + "Break images were silently lost during import — the image was decomposed before element_to_xhtml ran, leaving an empty wrapper; the wrapper is now replaced with
so the break is correctly rendered", + ], + }, + ], + }, + { + "version": "v0.1.8", + "date": "2026-04-06", + "summary": "Cover upload for DB-stored books, and rating moved to the Edit metadata panel.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Library: cover upload now works for DB-stored books — the upload endpoint previously returned 'File not found' because DB books have no file on disk; the cover is now stored directly in the cover cache", + ], + }, + { + "title": "Improvements", + "type": "improvement", + "changes": [ + "Book detail: rating moved from clickable stars to a dropdown in the Edit metadata panel — avoids touch-input issues on iPad where hover state caused all stars to appear filled", + ], + }, + ], + }, + { + "version": "v0.1.7", + "date": "2026-04-06", + "summary": "Search filter for unread novels/shorts, Dropbox chunked upload fix, and underscores in new filenames.", + "sections": [ + { + "title": "New feature", + "type": "feature", + "changes": [ + "Search: filter on unread novels or unread shorts — a second toggle row (All / Unread novels / Unread shorts) restricts results to books with no reading history; filter is preserved in the URL", + ], + }, + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Backup: files larger than 148 MB now upload correctly — chunked upload session (100 MB per chunk) replaces the single-call upload that hit Dropbox's payload size limit", + ], + }, + { + "title": "Improvements", + "type": "improvement", + "changes": [ + "File paths: spaces in new filenames are now replaced with underscores (publisher, author, title, series segments); series separator changed from ' - ' to '_-_'", + ], + }, + ], + }, + { + "version": "v0.1.6", + "date": "2026-04-05", + "summary": "Bug fixes: double chapter titles in exported EPUBs, and authors/publishers with only archived books now remain visible.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Export EPUB: double chapter titles fixed — same heading-stripping logic as the reader now applied before passing content to the chapter builder", + "Library: authors and publishers with only archived books now remain visible in the Authors and Publishers list views", + ], + }, + ], + }, + { + "version": "v0.1.5", + "date": "2026-04-04", + "summary": "Bug fixes: double chapter titles in pandoc-style EPUB content, and search now requires words in order (phrase match).", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Reader: double chapter titles for pandoc-converted books — headings wrapped in a
element were not stripped by the previous regex; now also removes the first heading found directly inside an opening
or
", + "Search: multi-word queries no longer match chapters where the words appear far apart — switched to phraseto_tsquery so all words must appear in order", + ], + }, + ], + }, + { + "version": "v0.1.4", + "date": "2026-04-04", + "summary": "Bug fixes: double chapter titles in the reader for DB-stored books, and archived books now shown in author/publisher detail with an indicator badge.", + "sections": [ + { + "title": "Bug fixes", + "type": "bugfix", + "changes": [ + "Reader: double chapter titles in DB-stored books — the chapter endpoint now strips all leading headings from stored content before prepending its own chapter title; affects books scraped before front-matter stripping was added", + "Library: archived books were missing from author and publisher detail views — detail views now include all books (active and archived); archived books have a badge on their cover so they remain distinguishable", + ], + }, + ], + }, { "version": "v0.1.3", "date": "2026-04-03", diff --git a/containers/novela/migrations.py b/containers/novela/migrations.py index df01357..5a01cf7 100644 --- a/containers/novela/migrations.py +++ b/containers/novela/migrations.py @@ -349,6 +349,33 @@ def migrate_rebuild_chapter_tsv_with_title() -> None: ) +def migrate_create_app_settings() -> None: + _exec( + """ + CREATE TABLE IF NOT EXISTS app_settings ( + id INTEGER PRIMARY KEY DEFAULT 1, + develop_mode BOOLEAN NOT NULL DEFAULT FALSE, + CONSTRAINT single_row CHECK (id = 1) + ) + """ + ) + _exec("INSERT INTO app_settings (id, develop_mode) VALUES (1, FALSE) ON CONFLICT DO NOTHING") + + +def migrate_app_settings_break_image() -> None: + _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_sha256 VARCHAR(64) DEFAULT NULL") + _exec("ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS break_image_ext VARCHAR(10) DEFAULT NULL") + + +def migrate_series_volume() -> None: + _exec( + """ + ALTER TABLE library + ADD COLUMN IF NOT EXISTS series_volume VARCHAR(20) NOT NULL DEFAULT '' + """ + ) + + def run_migrations() -> None: migrate_create_library() migrate_create_book_tags() @@ -371,3 +398,6 @@ def run_migrations() -> None: migrate_create_book_images() migrate_create_book_chapters() migrate_rebuild_chapter_tsv_with_title() + migrate_create_app_settings() + migrate_app_settings_break_image() + migrate_series_volume() diff --git a/containers/novela/routers/backup.py b/containers/novela/routers/backup.py index 430dd09..043d49e 100644 --- a/containers/novela/routers/backup.py +++ b/containers/novela/routers/backup.py @@ -14,13 +14,12 @@ import httpx from dropbox.exceptions import ApiError, AuthError from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn from routers.common import scan_media, upsert_book from security import decrypt_value, encrypt_value, is_encrypted_value -templates = Jinja2Templates(directory="templates") router = APIRouter() LIBRARY_DIR = Path(os.environ.get("LIBRARY_DIR", "library")) @@ -435,12 +434,36 @@ def _ensure_dropbox_dir(client: dropbox.Dropbox, path: str) -> None: pass +_DROPBOX_UPLOAD_CHUNK = 100 * 1024 * 1024 # 100 MB — below the 150 MB files_upload limit +_DROPBOX_UPLOAD_THRESHOLD = 148 * 1024 * 1024 # use session upload above this size + + def _dropbox_upload_bytes(client: dropbox.Dropbox, target_path: str, data: bytes) -> int: parent = str(Path(target_path).parent).replace("\\", "/") if not parent.startswith("/"): parent = "/" + parent _ensure_dropbox_dir(client, parent) - client.files_upload(data, target_path, mode=dropbox.files.WriteMode.overwrite, mute=True) + mode = dropbox.files.WriteMode.overwrite + if len(data) <= _DROPBOX_UPLOAD_THRESHOLD: + client.files_upload(data, target_path, mode=mode, mute=True) + else: + # Chunked upload session for large files + offset = 0 + session_id = None + while offset < len(data): + chunk = data[offset : offset + _DROPBOX_UPLOAD_CHUNK] + if session_id is None: + res = client.files_upload_session_start(chunk) + session_id = res.session_id + else: + cursor = dropbox.files.UploadSessionCursor(session_id=session_id, offset=offset) + remaining = len(data) - offset - len(chunk) + if remaining == 0: + commit = dropbox.files.CommitInfo(path=target_path, mode=mode, mute=True) + client.files_upload_session_finish(chunk, cursor, commit) + else: + client.files_upload_session_append_v2(chunk, cursor) + offset += len(chunk) return len(data) diff --git a/containers/novela/routers/builder.py b/containers/novela/routers/builder.py index 1674b69..2382405 100644 --- a/containers/novela/routers/builder.py +++ b/containers/novela/routers/builder.py @@ -5,7 +5,7 @@ from pathlib import Path from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn from epub import build_epub @@ -13,7 +13,6 @@ from routers.common import LIBRARY_DIR, make_rel_path, upsert_book from xhtml import normalize_wysiwyg_html router = APIRouter() -templates = Jinja2Templates(directory="templates") # ── Helpers ─────────────────────────────────────────────────────────────────── diff --git a/containers/novela/routers/bulk_import.py b/containers/novela/routers/bulk_import.py index c8ce7fd..ff3f634 100644 --- a/containers/novela/routers/bulk_import.py +++ b/containers/novela/routers/bulk_import.py @@ -4,7 +4,7 @@ from pathlib import Path from fastapi import APIRouter, File, Form, Request, UploadFile from fastapi.responses import HTMLResponse, JSONResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from cbr import cbr_page_count from db import get_db_conn @@ -18,7 +18,6 @@ from routers.common import ( upsert_book, ) -templates = Jinja2Templates(directory="templates") router = APIRouter() @@ -73,6 +72,7 @@ async def library_bulk_import( author = (row.get("author") or "").strip() or shared_author publisher = (row.get("publisher") or "").strip() or shared_publisher series = (row.get("series") or "").strip() or shared_data.get("series", "") + series_volume = ((row.get("series_volume") or "").strip() or shared_data.get("series_volume", ""))[:20] series_index, series_suffix = parse_volume_str(row.get("volume") or "") status = (row.get("status") or "").strip() or shared_status @@ -121,6 +121,7 @@ async def library_bulk_import( "series": series, "series_index": series_index, "series_suffix": series_suffix, + "series_volume": series_volume if series else "", "publication_status": status, "publish_date": publish_date, "has_cover": has_cover, diff --git a/containers/novela/routers/changelog.py b/containers/novela/routers/changelog.py index 30f67bc..719194c 100644 --- a/containers/novela/routers/changelog.py +++ b/containers/novela/routers/changelog.py @@ -1,11 +1,10 @@ from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from changelog import CHANGELOG router = APIRouter() -templates = Jinja2Templates(directory="templates") @router.get("/changelog", response_class=HTMLResponse) diff --git a/containers/novela/routers/common.py b/containers/novela/routers/common.py index faaec72..00a44d3 100644 --- a/containers/novela/routers/common.py +++ b/containers/novela/routers/common.py @@ -30,9 +30,9 @@ def is_db_filename(filename: str) -> bool: def clean_segment(value: str, fallback: str, max_len: int) -> str: - txt = re.sub(r"\s+", " ", (value or "").strip()) + txt = re.sub(r"\s+", "_", (value or "").strip()) txt = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", txt) - txt = re.sub(r"\.+$", "", txt).strip() + txt = re.sub(r"\.+$", "", txt).strip("_") return (txt or fallback)[:max_len] @@ -93,7 +93,7 @@ def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, s if series_name: idx = coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] - return Path("db") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}" + return Path("db") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}" return Path("db") / pub / auth / ttl if media_type == "epub": @@ -104,7 +104,7 @@ def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, s if series_name: idx = coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] - return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}.epub" + return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}.epub" return Path("epub") / pub / auth / "Stories" / f"{ttl}.epub" if media_type == "pdf": @@ -122,7 +122,7 @@ def make_rel_path(*, media_type: str, publisher: str, author: str, title: str, s if series_name: idx = coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] - return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}{comics_ext}" + return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}{comics_ext}" return Path("comics") / pub / auth / f"{ttl}{comics_ext}" @@ -239,6 +239,7 @@ def scan_epub(path: Path) -> dict: "series": "", "series_index": 0, "series_suffix": "", + "series_volume": "", "title": "", "publication_status": "", "author": "", @@ -280,6 +281,9 @@ def scan_epub(path: Path) -> dict: m = re.search(r']*name="novela:series_suffix"[^>]*content="([^"]+)"', opf, re.IGNORECASE) if m: out["series_suffix"] = re.sub(r"[^a-z]", "", m.group(1).lower())[:5] + m = re.search(r']*name="novela:series_volume"[^>]*content="([^"]+)"', opf, re.IGNORECASE) + if m: + out["series_volume"] = _html.unescape(m.group(1).strip())[:20] m = re.search(r']*name="publication_status"[^>]*content="([^"]+)"', opf, re.IGNORECASE) if m: out["publication_status"] = _html.unescape(m.group(1).strip()) @@ -358,9 +362,9 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N cur.execute( """ INSERT INTO library (filename, media_type, storage_type, title, author, publisher, has_cover, - series, series_index, series_suffix, publication_status, source_url, + series, series_index, series_suffix, series_volume, publication_status, source_url, publish_date, description, needs_review, want_to_read, rating, updated_at) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, %s, NOW()) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, %s, NOW()) ON CONFLICT (filename) DO UPDATE SET media_type = EXCLUDED.media_type, storage_type = EXCLUDED.storage_type, @@ -371,6 +375,7 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N series = COALESCE(NULLIF(EXCLUDED.series, ''), library.series), series_index = CASE WHEN COALESCE(EXCLUDED.series_index, 0) > 0 THEN EXCLUDED.series_index ELSE library.series_index END, series_suffix = COALESCE(NULLIF(EXCLUDED.series_suffix, ''), library.series_suffix), + series_volume = COALESCE(NULLIF(EXCLUDED.series_volume, ''), library.series_volume), publication_status = COALESCE(NULLIF(EXCLUDED.publication_status, ''), library.publication_status), source_url = COALESCE(NULLIF(EXCLUDED.source_url, ''), library.source_url), publish_date = COALESCE(EXCLUDED.publish_date, library.publish_date), @@ -389,6 +394,7 @@ def upsert_book(conn, filename: str, meta: dict, tags: list[tuple[str, str]] | N meta.get("series", ""), meta.get("series_index", 0), meta.get("series_suffix", ""), + meta.get("series_volume", ""), meta.get("publication_status", ""), meta.get("source_url", ""), meta.get("publish_date") or None, @@ -434,6 +440,7 @@ def list_library_json() -> list[dict]: l.rating, COALESCE(l.series_suffix, '') AS series_suffix, COALESCE(l.storage_type, 'file') AS storage_type, + COALESCE(l.series_volume, '') AS series_volume, json_agg( json_build_object('tag', bt.tag, 'tag_type', bt.tag_type) ) FILTER (WHERE bt.tag IS NOT NULL) AS tags @@ -451,8 +458,8 @@ def list_library_json() -> list[dict]: l.archived, l.needs_review, l.updated_at, rp.progress, rp.cfi, rp.page, rs.read_count, rs.last_read, - cc.filename, l.rating, l.series_suffix, l.storage_type - ORDER BY COALESCE(l.publisher, ''), COALESCE(l.author, ''), COALESCE(l.series, ''), l.series_index, COALESCE(l.title, '') + cc.filename, l.rating, l.series_suffix, l.storage_type, l.series_volume + ORDER BY COALESCE(l.publisher, ''), COALESCE(l.author, ''), COALESCE(l.series, ''), COALESCE(l.series_volume, ''), l.series_index, COALESCE(l.title, '') """ ) rows = cur.fetchall() @@ -471,6 +478,7 @@ def list_library_json() -> list[dict]: "series": r[6] or "", "series_index": r[7] or 0, "series_suffix": r[20] or "", + "series_volume": r[22] or "", "publication_status": r[8] or "", "want_to_read": bool(r[9]), "archived": bool(r[10]), @@ -482,7 +490,7 @@ def list_library_json() -> list[dict]: "read_count": r[16] or 0, "last_read": r[17].isoformat() if r[17] else None, "storage_type": r[21] or "file", - "tags": r[22] or [], + "tags": r[23] or [], "rating": r[19] or 0, } ) diff --git a/containers/novela/routers/editor.py b/containers/novela/routers/editor.py index 0705629..c00b1b7 100644 --- a/containers/novela/routers/editor.py +++ b/containers/novela/routers/editor.py @@ -8,14 +8,13 @@ from pathlib import Path from bs4 import BeautifulSoup from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse, JSONResponse, Response -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn from epub import read_epub_file, write_epub_file from routers.common import LIBRARY_DIR, is_db_filename, resolve_library_path, upsert_chapter router = APIRouter() -templates = Jinja2Templates(directory="templates") def _norm(base_dir: str, rel: str) -> str: diff --git a/containers/novela/routers/following.py b/containers/novela/routers/following.py index 608ae00..81243bf 100644 --- a/containers/novela/routers/following.py +++ b/containers/novela/routers/following.py @@ -2,11 +2,10 @@ from urllib.parse import unquote from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn -templates = Jinja2Templates(directory="templates") router = APIRouter() diff --git a/containers/novela/routers/grabber.py b/containers/novela/routers/grabber.py index 34ad719..e5c8279 100644 --- a/containers/novela/routers/grabber.py +++ b/containers/novela/routers/grabber.py @@ -8,10 +8,10 @@ from typing import AsyncGenerator from urllib.parse import urljoin, urlparse import httpx -from bs4 import Tag +from bs4 import BeautifulSoup, NavigableString, Tag from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse, StreamingResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn from epub import detect_image_format, make_chapter_xhtml, make_epub @@ -33,7 +33,6 @@ from scrapers.base import HEADERS from security import decrypt_value, encrypt_value, is_encrypted_value from xhtml import configure_break_patterns, element_to_xhtml, is_break_element -templates = Jinja2Templates(directory="templates") router = APIRouter() JOBS: dict[str, dict] = {} @@ -401,6 +400,15 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) _load_break_patterns() + storage_mode = job.get("storage_mode", "db") + # Break image path depends on storage mode: + # - EPUB: relative path inside the EPUB ZIP (break.png is embedded) + # - DB: absolute URL served by the static files handler + if storage_mode == "epub": + break_img_path = "../Images/break.png" + else: + break_img_path = "/static/break.png" + # Collect chapters as {title, content_html, images: [(sha256, ext, media_type, size, data)]} chapters = [] for i, ch in enumerate(book["chapters"], 1): @@ -413,7 +421,17 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) if content_el: for img_tag in content_el.find_all("img"): if is_break_element(img_tag): - img_tag.decompose() + # Replace the parent with
if it contains only + # this image, so element_to_xhtml can detect the break. + parent = img_tag.parent + meaningful = [ + c for c in parent.children + if not (isinstance(c, NavigableString) and not c.strip()) + ] + if len(meaningful) == 1 and parent is not content_el: + parent.replace_with(BeautifulSoup("
", "html.parser").hr) + else: + img_tag.decompose() continue src = img_tag.get("src", "") if not src or src.startswith("data:"): @@ -449,7 +467,7 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) filled_p = len(all_p) - empty_p empty_p_is_spacer = filled_p > 0 and empty_p >= filled_p * 0.5 for child in content_el.children: - part = element_to_xhtml(child, empty_p_is_spacer=empty_p_is_spacer) + part = element_to_xhtml(child, break_img_path=break_img_path, empty_p_is_spacer=empty_p_is_spacer) if part.strip(): xhtml_parts.append(part) @@ -464,7 +482,6 @@ async def _run_scrape(job_id: str, url: str, username: str, password: str, send) job["done"] = True return - storage_mode = job.get("storage_mode", "db") send("status", {"message": "Saving to library..."}) book_tags = ( diff --git a/containers/novela/routers/library.py b/containers/novela/routers/library.py index e040dc4..3adcc67 100644 --- a/containers/novela/routers/library.py +++ b/containers/novela/routers/library.py @@ -6,7 +6,7 @@ from pathlib import Path from fastapi import APIRouter, File, Request, UploadFile from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response -from fastapi.templating import Jinja2Templates +from shared_templates import templates from PIL import UnidentifiedImageError from db import get_db_conn @@ -28,7 +28,6 @@ from routers.common import ( upsert_cover_cache, ) -templates = Jinja2Templates(directory="templates") router = APIRouter() @@ -343,12 +342,6 @@ async def library_cover(filename: str): @router.post("/library/cover/{filename:path}") async def library_add_cover(filename: str, request: Request): - full = resolve_library_path(filename) - if full is None or not full.exists(): - return {"error": "File not found"} - if media_type_from_suffix(full) != "epub": - return {"error": "Cover upload is only supported for EPUB"} - body = await request.json() cover_b64 = body.get("cover_b64", "") if not cover_b64: @@ -356,6 +349,35 @@ async def library_add_cover(filename: str, request: Request): try: cover_data = base64.b64decode(cover_b64) + except Exception as e: + return {"error": str(e)} + + if is_db_filename(filename): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM library WHERE filename = %s", (filename,)) + if not cur.fetchone(): + return {"error": "Not found"} + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE library SET has_cover = TRUE, updated_at = NOW() WHERE filename = %s", + (filename,), + ) + try: + thumb = make_cover_thumb_webp(cover_data) + upsert_cover_cache(conn, filename, "image/webp", thumb) + except (UnidentifiedImageError, OSError, ValueError): + pass + return {"ok": True} + + full = resolve_library_path(filename) + if full is None or not full.exists(): + return {"error": "File not found"} + if media_type_from_suffix(full) != "epub": + return {"error": "Cover upload is only supported for EPUB"} + + try: add_cover_to_epub(full, cover_data) except Exception as e: return {"error": str(e)} @@ -419,6 +441,24 @@ async def library_archive(filename: str): return {"ok": True, "archived": val} +@router.post("/library/archive-series") +async def library_archive_series(request: Request): + body = await request.json() + series = body.get("series", "") + archive = bool(body.get("archive", True)) + if not series: + return {"error": "series is required"} + with get_db_conn() as conn: + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE library SET archived = %s, updated_at = NOW() WHERE series = %s", + (archive, series), + ) + count = cur.rowcount + return {"ok": True, "archived": archive, "count": count} + + @router.post("/library/new/mark-reviewed") async def library_mark_new_reviewed(request: Request): body = await request.json() @@ -663,46 +703,59 @@ async def bulk_check_duplicates(request: Request): for item in items: title = item.get("title", "").strip().lower() author = item.get("author", "").strip().lower() + series = item.get("series", "").strip().lower() vol_str = item.get("volume", "").strip() try: vol_int = int(vol_str) if vol_str else None except ValueError: vol_int = None - parsed.append((title, author, vol_int)) + parsed.append((title, author, series, vol_int)) - # Fetch all DB rows matching any (title, author) pair - title_author_pairs = list({(t, a) for t, a, _ in parsed if t}) - if not title_author_pairs: - return {"duplicates": [False] * len(items)} - - conditions = " OR ".join( - "(LOWER(TRIM(title)) = %s AND LOWER(TRIM(author)) = %s)" for _ in title_author_pairs - ) - params = [v for pair in title_author_pairs for v in pair] with get_db_conn() as conn: with conn.cursor() as cur: - cur.execute( - f"SELECT LOWER(TRIM(title)), LOWER(TRIM(author)), series_index" - f" FROM library WHERE {conditions}", - params, - ) - rows = cur.fetchall() + # Check by title+author + title_author_pairs = list({(t, a) for t, a, s, _ in parsed if t}) + existing_with_vol: set = set() + existing_title_author: set = set() + if title_author_pairs: + conditions = " OR ".join( + "(LOWER(TRIM(title)) = %s AND LOWER(TRIM(author)) = %s)" for _ in title_author_pairs + ) + params = [v for pair in title_author_pairs for v in pair] + cur.execute( + f"SELECT LOWER(TRIM(title)), LOWER(TRIM(author)), series_index" + f" FROM library WHERE {conditions}", + params, + ) + rows = cur.fetchall() + existing_with_vol = {(r[0] or "", r[1] or "", r[2]) for r in rows} + existing_title_author = {(r[0] or "", r[1] or "") for r in rows} - # (title, author, series_index) for volume-aware lookup - existing_with_vol = {(r[0] or "", r[1] or "", r[2]) for r in rows} - # (title, author) for volume-less lookup - existing_title_author = {(r[0] or "", r[1] or "") for r in rows} + # Check by series+author+series_index (catches title-format changes) + series_author_pairs = list({(s, a) for _, a, s, _ in parsed if s and a}) + existing_series_vol: set = set() + if series_author_pairs: + conditions2 = " OR ".join( + "(LOWER(TRIM(series)) = %s AND LOWER(TRIM(author)) = %s)" for _ in series_author_pairs + ) + params2 = [v for pair in series_author_pairs for v in pair] + cur.execute( + f"SELECT LOWER(TRIM(series)), LOWER(TRIM(author)), series_index" + f" FROM library WHERE {conditions2}", + params2, + ) + existing_series_vol = {(r[0] or "", r[1] or "", r[2]) for r in cur.fetchall()} duplicates = [] - for title, author, vol_int in parsed: - if not title: - duplicates.append(False) - elif vol_int is not None: - # Volume known: only a duplicate when title+author+volume all match - duplicates.append((title, author, vol_int) in existing_with_vol) - else: - # No volume: duplicate if any title+author match exists + for title, author, series, vol_int in parsed: + if vol_int is not None: + by_title = (title, author, vol_int) in existing_with_vol if title else False + by_series = (series, author, vol_int) in existing_series_vol if series else False + duplicates.append(by_title or by_series) + elif title: duplicates.append((title, author) in existing_title_author) + else: + duplicates.append(False) return {"duplicates": duplicates} diff --git a/containers/novela/routers/reader.py b/containers/novela/routers/reader.py index 8f5b6fc..91d0a53 100644 --- a/containers/novela/routers/reader.py +++ b/containers/novela/routers/reader.py @@ -12,7 +12,7 @@ from pathlib import Path from bs4 import BeautifulSoup from fastapi import APIRouter, Request from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response -from fastapi.templating import Jinja2Templates +from shared_templates import templates from cbr import cbr_get_page, cbr_page_count from db import get_db_conn @@ -34,7 +34,6 @@ from routers.common import ( ) router = APIRouter() -templates = Jinja2Templates(directory="templates") # --------------------------------------------------------------------------- # EPUB helpers @@ -281,6 +280,7 @@ def _sync_epub_metadata( series: str, series_index: int | str | None, series_suffix: str = "", + series_volume: str = "", subjects: list[str], ) -> None: """Write edited metadata back into OPF so DB and EPUB stay aligned.""" @@ -360,9 +360,11 @@ def _sync_epub_metadata( set_named_meta('calibre:series_index', str(_coerce_series_index(series_index))) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] set_named_meta('novela:series_suffix', sfx) + set_named_meta('novela:series_volume', (series_volume or '').strip()[:20]) else: set_named_meta('calibre:series_index', '') set_named_meta('novela:series_suffix', '') + set_named_meta('novela:series_volume', '') _rewrite_epub_entries(epub_path, {opf_path: str(opf).encode('utf-8')}) @@ -398,9 +400,9 @@ def _rewrite_epub_entries(epub_path: Path, updates: dict[str, bytes], remove_pat def _clean_segment(value: str, fallback: str, max_len: int = 100) -> str: - txt = re.sub(r"\s+", " ", (value or "").strip()) + txt = re.sub(r"\s+", "_", (value or "").strip()) txt = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", txt) - txt = re.sub(r"\.+$", "", txt).strip() + txt = re.sub(r"\.+$", "", txt).strip("_") if not txt: txt = fallback return txt[:max_len] @@ -432,7 +434,7 @@ def _make_rel_path( if series_name: idx = _coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] - return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}.epub" + return Path("epub") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}.epub" return Path("epub") / pub / auth / "Stories" / f"{ttl}.epub" if ext == ".pdf": @@ -445,7 +447,7 @@ def _make_rel_path( if series_name: idx = _coerce_series_index(series_index) sfx = re.sub(r"[^a-z]", "", (series_suffix or "").lower())[:5] - return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx} - {ttl}{ext}" + return Path("comics") / pub / auth / "Series" / series_name / f"{idx:03d}{sfx}_-_{ttl}{ext}" return Path("comics") / pub / auth / f"{ttl}{ext}" @@ -535,6 +537,14 @@ async def get_chapter_html(filename: str, index: int): return Response(status_code=404) title, content = row safe_title = _html.escape(title or "") + # Strip leading h-tags from stored content — the endpoint always + # prepends its own

, so content scraped + # before front-matter stripping was added would show the title twice. + # Handles two layouts: + # 1.

at the very start of content + # 2.
\n

(pandoc-style wrapping) + content = re.sub(r'(?si)^(\s*]*)?>.*?)+\s*', '', content) + content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)]*>.*?\s*', r'\1', content, count=1) return Response( f'

{safe_title}

\n{content}\n', media_type="text/html", @@ -713,7 +723,8 @@ async def book_detail_page(filename: str, request: Request): SELECT title, author, publisher, has_cover, series, series_index, publication_status, want_to_read, source_url, archived, publish_date, description, rating, COALESCE(series_suffix, '') AS series_suffix, - COALESCE(storage_type, 'file') AS storage_type + COALESCE(storage_type, 'file') AS storage_type, + COALESCE(series_volume, '') AS series_volume FROM library WHERE filename = %s """, (filename,), @@ -728,6 +739,7 @@ async def book_detail_page(filename: str, request: Request): "series": lib_row[4] or "", "series_index": lib_row[5] or 0, "series_suffix": lib_row[13] or "", + "series_volume": lib_row[15] or "", "publication_status": lib_row[6] or "", "want_to_read": lib_row[7] or False, "source_url": lib_row[8] or "", @@ -814,6 +826,7 @@ async def book_detail_page(filename: str, request: Request): "series": entry["series"], "series_index": entry["series_index"], "series_suffix": entry["series_suffix"], + "series_volume": entry.get("series_volume", ""), "genres": genres, "subgenres": subgenres, "tags": tags_list, @@ -873,6 +886,53 @@ async def api_suggestions(type: str | None = None): return JSONResponse([r[0] for r in cur.fetchall()]) +@router.get("/api/series-nav/{filename:path}") +async def api_series_nav(filename: str): + """Return the previous and next book in the same series, ordered by series_index.""" + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT series, series_index, COALESCE(series_suffix, '') FROM library WHERE filename = %s", + (filename,), + ) + row = cur.fetchone() + if not row or not row[0]: + return JSONResponse({"prev": None, "next": None}) + + series, current_index, current_suffix = row + + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT filename, title, series_index, COALESCE(series_suffix, '') + FROM library + WHERE series = %s AND series IS NOT NULL AND series <> '' + ORDER BY series_index ASC, series_suffix ASC + """, + (series,), + ) + siblings = cur.fetchall() + + # Find position of current book in ordered list + pos = None + for i, (fn, _title, idx, sfx) in enumerate(siblings): + if fn == filename: + pos = i + break + + if pos is None: + return JSONResponse({"prev": None, "next": None}) + + def entry(row): + return {"filename": row[0], "title": row[1], "index": row[2], "suffix": row[3]} + + return JSONResponse({ + "prev": entry(siblings[pos - 1]) if pos > 0 else None, + "next": entry(siblings[pos + 1]) if pos < len(siblings) - 1 else None, + }) + + @router.patch("/library/book/{filename:path}") async def book_update(filename: str, request: Request): """Update book metadata and tags, and rename/move the file when needed.""" @@ -881,6 +941,7 @@ async def book_update(filename: str, request: Request): author = body.get("author", "") publisher = body.get("publisher", "") series = body.get("series", "") + series_volume = (body.get("series_volume", "") or "").strip()[:20] from routers.common import parse_volume_str series_index, series_suffix = parse_volume_str(body.get("series_index", "")) @@ -910,11 +971,11 @@ async def book_update(filename: str, request: Request): """ INSERT INTO library ( filename, title, author, publisher, has_cover, - series, series_index, series_suffix, publication_status, + series, series_index, series_suffix, series_volume, publication_status, source_url, publish_date, description, archived, needs_review, storage_type, updated_at ) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, 'db', NOW()) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, 'db', NOW()) ON CONFLICT (filename) DO UPDATE SET title = EXCLUDED.title, author = EXCLUDED.author, @@ -922,6 +983,7 @@ async def book_update(filename: str, request: Request): series = EXCLUDED.series, series_index = EXCLUDED.series_index, series_suffix = EXCLUDED.series_suffix, + series_volume = EXCLUDED.series_volume, publication_status = EXCLUDED.publication_status, source_url = EXCLUDED.source_url, publish_date = EXCLUDED.publish_date, @@ -933,6 +995,7 @@ async def book_update(filename: str, request: Request): new_filename, title, author, publisher, has_cover, series, series_index if series else 0, series_suffix if series else "", + series_volume if series else "", body.get("publication_status", ""), body.get("source_url", ""), body.get("publish_date") or None, @@ -1001,6 +1064,7 @@ async def book_update(filename: str, request: Request): series=series, series_index=series_index if series else 0, series_suffix=series_suffix if series else "", + series_volume=series_volume if series else "", subjects=(body.get("genres", []) + body.get("subgenres", []) + body.get("tags", [])), ) @@ -1015,11 +1079,11 @@ async def book_update(filename: str, request: Request): """ INSERT INTO library ( filename, title, author, publisher, has_cover, - series, series_index, series_suffix, publication_status, + series, series_index, series_suffix, series_volume, publication_status, source_url, publish_date, description, archived, needs_review, updated_at ) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, NOW()) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE, FALSE, NOW()) ON CONFLICT (filename) DO UPDATE SET title = EXCLUDED.title, author = EXCLUDED.author, @@ -1027,6 +1091,7 @@ async def book_update(filename: str, request: Request): series = EXCLUDED.series, series_index = EXCLUDED.series_index, series_suffix = EXCLUDED.series_suffix, + series_volume = EXCLUDED.series_volume, publication_status = EXCLUDED.publication_status, source_url = EXCLUDED.source_url, publish_date = EXCLUDED.publish_date, @@ -1043,6 +1108,7 @@ async def book_update(filename: str, request: Request): series, series_index if series else 0, series_suffix if series else "", + series_volume if series else "", body.get("publication_status", ""), body.get("source_url", ""), body.get("publish_date") or None, @@ -1378,6 +1444,10 @@ async def export_epub(filename: str): seen_images: dict[str, str] = {} chapters = [] for ch_idx, ch_title, ch_content in ch_rows: + # Strip leading h-tags from stored content (same logic as chapter endpoint) + # to prevent double titles when make_chapter_xhtml prepends its own heading. + ch_content = re.sub(r'(?si)^(\s*]*)?>.*?)+\s*', '', ch_content) + ch_content = re.sub(r'(?si)(<(?:section|div)[^>]*>\s*)]*>.*?\s*', r'\1', ch_content, count=1) modified_html, new_imgs = _rewrite_db_images_for_epub(ch_content, seen_images) chapter_xhtml = make_chapter_xhtml(ch_title or f"Chapter {ch_idx + 1}", modified_html, ch_idx + 1) chapters.append({"title": ch_title or f"Chapter {ch_idx + 1}", "xhtml": chapter_xhtml, "images": new_imgs}) diff --git a/containers/novela/routers/search.py b/containers/novela/routers/search.py index 8325bb0..6a144b0 100644 --- a/containers/novela/routers/search.py +++ b/containers/novela/routers/search.py @@ -2,12 +2,11 @@ from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse, JSONResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn router = APIRouter() -templates = Jinja2Templates(directory="templates") @router.get("/search", response_class=HTMLResponse) @@ -16,14 +15,38 @@ async def search_page(request: Request): @router.get("/api/search") -async def api_search(q: str = ""): +async def api_search(q: str = "", mode: str = "phrase", filter: str = "all"): q = q.strip() if not q or len(q) > 500: return JSONResponse([]) + tsquery_fn = "phraseto_tsquery" if mode == "phrase" else "plainto_tsquery" + + extra_joins = "" + extra_where = "" + if filter in ("unread_novels", "unread_shorts"): + extra_joins = """ + LEFT JOIN reading_sessions rs ON rs.filename = l.filename + LEFT JOIN reading_progress rp ON rp.filename = l.filename""" + extra_where = " AND rs.id IS NULL AND COALESCE(rp.progress, 0) = 0" + if filter == "unread_novels": + extra_where += """ + AND NOT EXISTS ( + SELECT 1 FROM book_tags bt + WHERE bt.filename = l.filename AND bt.tag = 'Shorts' + AND bt.tag_type IN ('tag', 'subject') + )""" + elif filter == "unread_shorts": + extra_where += """ + AND EXISTS ( + SELECT 1 FROM book_tags bt + WHERE bt.filename = l.filename AND bt.tag = 'Shorts' + AND bt.tag_type IN ('tag', 'subject') + )""" + with get_db_conn() as conn: with conn.cursor() as cur: cur.execute( - """ + f""" SELECT l.filename, l.title, @@ -38,11 +61,13 @@ async def api_search(q: str = ""): ts_rank(bc.content_tsv, plainto_tsquery('simple', %s)) AS rank FROM book_chapters bc JOIN library l ON l.filename = bc.filename - WHERE (bc.content_tsv @@ plainto_tsquery('simple', %s) + {extra_joins} + WHERE (bc.content_tsv @@ {tsquery_fn}('simple', %s) OR LOWER(bc.title) LIKE LOWER('%%' || %s || '%%')) AND NOT l.archived + {extra_where} + GROUP BY l.filename, l.title, l.author, bc.chapter_index, bc.title, bc.content, bc.content_tsv ORDER BY rank DESC, bc.chapter_index ASC - LIMIT 30 """, (q, q, q, q), ) diff --git a/containers/novela/routers/settings.py b/containers/novela/routers/settings.py index 5e61d82..047e2ef 100644 --- a/containers/novela/routers/settings.py +++ b/containers/novela/routers/settings.py @@ -1,12 +1,13 @@ import re -from fastapi import APIRouter, Request +from fastapi import APIRouter, Request, UploadFile, File from fastapi.responses import HTMLResponse -from fastapi.templating import Jinja2Templates +from shared_templates import templates from db import get_db_conn +from epub import detect_image_format +from routers.common import write_image_file -templates = Jinja2Templates(directory="templates") router = APIRouter() @@ -95,6 +96,57 @@ async def delete_break_pattern(pid: int): return {"ok": True} +@router.get("/api/app-settings") +async def get_app_settings(): + with get_db_conn() as conn: + with conn.cursor() as cur: + cur.execute("SELECT develop_mode, break_image_sha256, break_image_ext FROM app_settings WHERE id = 1") + row = cur.fetchone() + if not row: + return {"develop_mode": False, "break_image_url": None} + sha, ext = row[1], row[2] + break_image_url = f"/library/db-images/{sha[:2]}/{sha}{ext}" if sha and ext else None + return {"develop_mode": bool(row[0]), "break_image_url": break_image_url} + + +@router.patch("/api/app-settings") +async def update_app_settings(request: Request): + body = await request.json() + with get_db_conn() as conn: + with conn: + with conn.cursor() as cur: + if "develop_mode" in body: + cur.execute( + "UPDATE app_settings SET develop_mode = %s WHERE id = 1", + (bool(body["develop_mode"]),), + ) + return {"ok": True} + + +@router.post("/api/app-settings/break-image") +async def upload_break_image(file: UploadFile = File(...)): + data = await file.read() + if not data: + return {"error": "Empty file"} + _, media_type = detect_image_format(data, file.filename or "break") + sha, ext, _ = write_image_file(data, media_type) + # Also write to static/break.png so EPUB embeds the same image + try: + with open("static/break.png", "wb") as f: + f.write(data) + except Exception: + pass + with get_db_conn() as conn: + with conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE app_settings SET break_image_sha256 = %s, break_image_ext = %s WHERE id = 1", + (sha, ext), + ) + url = f"/library/db-images/{sha[:2]}/{sha}{ext}" + return {"ok": True, "url": url} + + @router.delete("/api/reading-history") async def reset_reading_history(): with get_db_conn() as conn: diff --git a/containers/novela/scrapers/__init__.py b/containers/novela/scrapers/__init__.py index 18fb974..1f1dd01 100644 --- a/containers/novela/scrapers/__init__.py +++ b/containers/novela/scrapers/__init__.py @@ -1,13 +1,23 @@ from .base import BaseScraper from .archiveofourown import ArchiveOfOurOwnScraper from .awesomedude import AwesomeDudeScraper +from .codeysworld import CodeysWorldScraper from .gayauthors import GayAuthorsScraper +from .iomfats import IomfatsScraper +from .nifty import NiftyScraper +from .nifty_new import NiftyNewScraper +from .tedlouis import TedLouisScraper # Register scrapers in priority order (first match wins) _SCRAPERS: list[type[BaseScraper]] = [ ArchiveOfOurOwnScraper, AwesomeDudeScraper, + CodeysWorldScraper, GayAuthorsScraper, + IomfatsScraper, + NiftyNewScraper, + NiftyScraper, + TedLouisScraper, ] diff --git a/containers/novela/scrapers/codeysworld.py b/containers/novela/scrapers/codeysworld.py new file mode 100644 index 0000000..65d8f62 --- /dev/null +++ b/containers/novela/scrapers/codeysworld.py @@ -0,0 +1,208 @@ +import re +from urllib.parse import urljoin, urlparse + +import httpx +from bs4 import BeautifulSoup + +from .base import BaseScraper + +CW_BASE = "https://www.codeysworld.org" + +LAYOUT_RE = re.compile( + r"nav|menu|sidebar|header|footer|breadcrumb|pagination|" + r"comment|widget|aside|banner|ad|rating|follow|share", + re.I, +) + + +class CodeysWorldScraper(BaseScraper): + + @classmethod + def matches(cls, url: str) -> bool: + return "codeysworld.org" in url + + async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool: + return True # no login required + + async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: + r = await client.get(url) + soup = BeautifulSoup(r.text, "html.parser") + actual_url = str(r.url) + + # Title:

+ h1 = soup.find("h1") + book_title = h1.get_text(strip=True) if h1 else "Unknown title" + + # Author:

"by Author Name" + author = "Unknown author" + h2 = soup.find("h2") + if h2: + text = h2.get_text(strip=True) + m = re.match(r"^by\s+(.+)$", text, re.I) + if m: + author = m.group(1).strip() + + # URL path: /{author_slug}/{category}/filename.htm + tags: list[str] = [] + path_parts = urlparse(actual_url).path.strip("/").split("/") + if len(path_parts) >= 3: + author_slug = path_parts[-3] + category = path_parts[-2] + elif len(path_parts) >= 2: + author_slug = path_parts[-2] + category = "" + else: + author_slug = "" + category = "" + + # Fallback: derive author from URL slug if not found in page + if author == "Unknown author" and author_slug: + author = author_slug.replace("_", " ").title() + + # Category → tag + if category and category.lower() not in ("codey", author_slug.lower()): + tags = [category.replace("_", " ").title()] + + # Chapter discovery: links to .htm/.html in the same directory, + # excluding the index page itself and audio/image files. + base_dir = actual_url.rsplit("/", 1)[0] + "/" + chapter_links: list[dict] = [] + seen: set[str] = set() + for a in soup.find_all("a", href=True): + href = a["href"] + if re.search(r"\.(mp3|mp4|ogg|wav|jpg|jpeg|png|gif)$", href, re.I): + continue + full = urljoin(actual_url, href) + if ( + full.startswith(base_dir) + and re.search(r"\.html?(\?.*)?$", full, re.I) + and full.rstrip("/") != actual_url.rstrip("/") + and full not in seen + ): + seen.add(full) + text = re.sub(r"\s+", " ", a.get_text(separator=" ")).strip() + chapter_links.append({"url": full, "title": text, "book_title": book_title, "author": author}) + + if not chapter_links: + # Single-file story + chapter_links = [{"url": actual_url, "title": book_title, "book_title": book_title, "author": author}] + chapter_method = "single_page" + else: + chapter_method = "html_scan" + for i, c in enumerate(chapter_links, 1): + t = c["title"] + if not t or t.lower() == book_title.lower(): + c["title"] = f"Chapter {i}" + elif re.match(r"^\d+$", t): + c["title"] = f"Chapter {t}" + + return { + "title": book_title, + "author": author, + "publisher": "codeysworld.org", + "series": "", + "series_index_hint": 0, + "genres": [], + "subgenres": [], + "tags": tags, + "description": "", + "updated_date": "", + "publication_status": "", + "source_url": url, + "chapters": chapter_links, + "chapter_method": chapter_method, + "index_image_url": None, + } + + async def fetch_chapter(self, client: httpx.AsyncClient, ch: dict) -> dict: + cr = await client.get(ch["url"]) + csoup = BeautifulSoup(cr.text, "html.parser") + title = ch["title"] + book_title_lc = ch.get("book_title", "").lower() + author_lc = ch.get("author", "").lower() + + # Refine chapter title from an in-page heading, + # skipping the book title and "by Author" headings. + for tag in csoup.find_all(["h1", "h2", "h3"]): + text = re.sub(r"\s+", " ", tag.get_text(separator=" ")).strip() + if not text or len(text) >= 120: + continue + text_lc = text.lower() + if re.search(r"\s+by\s+", text, re.I): + continue + if book_title_lc and book_title_lc in text_lc: + continue + if author_lc and author_lc in text_lc: + continue + title = text + break + + # Content extraction: prefer a content-like wrapper; fall back to body. + content_el = ( + csoup.find(id=re.compile(r"^(chapter|story|content|text)[_-]?", re.I)) + or csoup.find(class_=re.compile(r"story.?text|chapter.?text|post.?content|entry.?content", re.I)) + or csoup.find("article") + ) + + if not content_el: + candidates = [ + el for el in csoup.find_all(["div", "article", "section"]) + if not re.search(LAYOUT_RE, " ".join(el.get("class", []))) + and not re.search(LAYOUT_RE, el.get("id", "")) + ] + if candidates: + content_el = max(candidates, key=lambda el: len(el.get_text(" ", strip=True))) + + body = csoup.find("body") + if body: + body_text_len = len(body.get_text(" ", strip=True)) + selected_p_count = len(content_el.find_all("p")) if content_el else 0 + selected_text_len = len(content_el.get_text(" ", strip=True)) if content_el else 0 + + # Codeysworld stores story text as direct

children of body. + if not content_el or selected_p_count < 3 or selected_text_len < int(body_text_len * 0.35): + content_el = body + + if not content_el: + content_el = body + + # Strip site boilerplate: headings (title/byline), navigation links, + # audio links and empty nodes — anywhere in the content element. + if content_el: + # Remove all h1/h2 headings (title and "by Author") + for el in content_el.find_all(["h1", "h2"]): + el.decompose() + + # Remove navigation links ("Back to …", "Home", etc.) + for el in content_el.find_all("a", href=True): + text = el.get_text(strip=True) + if re.search(r"back\s+to|<{0,2}\s*back|home", text, re.I): + parent = el.parent + el.decompose() + # Remove the parent too if it's now empty + if parent and not parent.get_text(strip=True): + parent.decompose() + + # Remove audio links (links to .mp3 files or containing "listen"/"audio") + for el in content_el.find_all("a", href=True): + href = el.get("href", "") + text = el.get_text(strip=True) + if re.search(r"\.mp3$", href, re.I) or re.search(r"listen|audio", text, re.I): + parent = el.parent + el.decompose() + if parent and not parent.get_text(strip=True): + parent.decompose() + + # Remove email links ("Email Author") + for el in content_el.find_all("a", href=re.compile(r"^mailto:", re.I)): + parent = el.parent + el.decompose() + if parent and not parent.get_text(strip=True): + parent.decompose() + + return { + "title": title, + "content_el": content_el, + "selector_id": content_el.get("id") if content_el else None, + "selector_class": " ".join(content_el.get("class", [])) if content_el else None, + } diff --git a/containers/novela/scrapers/iomfats.py b/containers/novela/scrapers/iomfats.py new file mode 100644 index 0000000..03ce395 --- /dev/null +++ b/containers/novela/scrapers/iomfats.py @@ -0,0 +1,267 @@ +import re +from urllib.parse import urljoin, urlparse + +import httpx +from bs4 import BeautifulSoup, NavigableString + +from .base import BaseScraper + +IOMFATS_BASE = "https://iomfats.org" + + +class IomfatsScraper(BaseScraper): + + @classmethod + def matches(cls, url: str) -> bool: + return "iomfats.org" in url + + async def login(self, client: httpx.AsyncClient, username: str, password: str) -> bool: + return True # no login required + + def _author_page_url(self, url: str) -> str: + """Derive the author index page URL from any iomfats.org URL.""" + parsed = urlparse(url) + parts = parsed.path.strip("/").split("/") + # Path: storyshelf/hosted/{author}/... + # Author page is the first 3 segments. + if len(parts) >= 3 and parts[0] == "storyshelf" and parts[1] == "hosted": + author_path = "/" + "/".join(parts[:3]) + "/" + return f"{parsed.scheme}://{parsed.netloc}{author_path}" + return url + + def _is_author_page(self, url: str) -> bool: + parts = urlparse(url).path.strip("/").split("/") + return ( + len(parts) <= 3 + and len(parts) >= 2 + and parts[0] == "storyshelf" + and parts[1] == "hosted" + ) + + def _story_folder(self, url: str) -> str | None: + """Return the story folder segment from a chapter URL, or None.""" + parts = urlparse(url).path.strip("/").split("/") + # storyshelf/hosted/{author}/{story-folder}/{chapter}.html + if len(parts) >= 5: + return parts[3] + return None + + async def fetch_book_info(self, client: httpx.AsyncClient, url: str) -> dict: + if self._is_author_page(url): + raise ValueError( + "Voer een chapter-URL in, geen author-pagina. " + "Kopieer de URL van het eerste hoofdstuk van het gewenste verhaal." + ) + + story_folder = self._story_folder(url) + if not story_folder: + raise ValueError( + "Onverwacht URL-formaat voor iomfats.org. " + "Gebruik de URL van een hoofdstuk, bijv. …/grasshopper/dreamchasers/01.html" + ) + + author_url = self._author_page_url(url) + r = await client.get(author_url) + soup = BeautifulSoup(r.text, "html.parser") + content = soup.find("div", id="content") + if not content: + raise ValueError("Kan de author-pagina niet verwerken (geen #content element).") + + # Author name from "by Name" heading + author = "Unknown author" + for el in content.find_all(["h2", "h3"]): + text = el.get_text(strip=True) + m = re.match(r"^by\s+(.+)$", text, re.I) + if m: + author = m.group(1).strip() + break + # Fallback: author slug from URL + if author == "Unknown author": + parts = urlparse(author_url).path.strip("/").split("/") + if len(parts) >= 3: + author = parts[2].replace("_", " ").title() + + # Walk the content to find the story matching story_folder. + # + # Two structures on the author page: + # + # Single story: + #

Book Title

+ # + # + # Multi-part series: + #

Series Name

+ #
    + #
  • Book Title (part 1)

    + #

    [status]

    + # + #
  • + #
  • Book Title (part 2)

  • + #
+ + book_title = "" + series = "" + series_index_hint = 0 + publication_status = "" + chapter_links: list[dict] = [] + + nodes = list(content.children) + i = 0 + while i < len(nodes): + node = nodes[i] + if not hasattr(node, "name"): + i += 1 + continue + + if node.name == "h3": + outer_title = node.get_text(strip=True) + if re.match(r"^by\s+", outer_title, re.I): + i += 1 + continue + + # Find the following
{% set r = (rating | default(0)) | int %} -
+
{% for i in range(1, 6) %} - + {% endfor %}
@@ -54,7 +54,7 @@ {% if series %}
Series - {{ series }}{% if series_index is defined and (series_index or series_suffix or series_is_indexed) %} [{{ series_index }}{{ series_suffix }}]{% endif %} + {{ series }}{% if series_volume %} ({{ series_volume }}){% endif %}{% if series_index is defined and (series_index or series_suffix or series_is_indexed) %} [{{ series_index }}{{ series_suffix }}]{% endif %}
{% endif %}
@@ -254,7 +254,13 @@
-
+
+ +
+ + +
+
@@ -277,7 +283,8 @@
-
+
+
@@ -289,6 +296,17 @@
+
+ + +
@@ -368,6 +386,7 @@ series: {{ (series or '') | tojson }}, series_index: {{ series_index or 0 }}, series_suffix: {{ (series_suffix or '') | tojson }}, + series_volume: {{ (series_volume or '') | tojson }}, publication_status: {{ (publication_status or '') | tojson }}, source_url: {{ (source_url or '') | tojson }}, publish_date: {{ (publish_date or '') | tojson }}, diff --git a/containers/novela/templates/builder.html b/containers/novela/templates/builder.html index 826d908..89c195a 100644 --- a/containers/novela/templates/builder.html +++ b/containers/novela/templates/builder.html @@ -3,7 +3,7 @@ - Novela — Book Builder{% if view == 'editor' %}: {{ draft.title }}{% endif %} + Novela{% if develop_mode() %} Develop{% endif %} — Book Builder{% if view == 'editor' %}: {{ draft.title }}{% endif %} diff --git a/containers/novela/templates/bulk_import.html b/containers/novela/templates/bulk_import.html index 6ff5e23..5682f83 100644 --- a/containers/novela/templates/bulk_import.html +++ b/containers/novela/templates/bulk_import.html @@ -3,7 +3,7 @@ - Novela – Bulk Import + Novela{% if develop_mode() %} Develop{% endif %} — Bulk Import @@ -189,7 +189,7 @@ td.td-skip input[type="checkbox"] { cursor: pointer; accent-color: var(--error); } tbody tr.row-dup { background: rgba(200,90,58,0.06); } tbody tr.row-dup:hover { background: rgba(200,90,58,0.10); } - tbody tr.row-dup.row-skipped { opacity: 0.38; } + tbody tr.row-skipped { opacity: 0.38; } .cnt-dup { color: var(--error); } .dup-actions { display: flex; gap: 0.5rem; align-items: center; @@ -322,6 +322,10 @@ +
+ + +
+ + +
@@ -373,12 +385,13 @@ # Filename Series + Yr/Vol Vol Title Author Publisher Year - Skip + Skip @@ -432,10 +445,11 @@