novela/containers/novela/routers/library.py

696 lines
25 KiB
Python

import base64
import uuid
from datetime import datetime, timezone
from pathlib import Path
from fastapi import APIRouter, File, Request, UploadFile
from fastapi.responses import HTMLResponse, Response
from fastapi.templating import Jinja2Templates
from PIL import UnidentifiedImageError
from db import get_db_conn
from epub import add_cover_to_epub
from routers.common import (
LIBRARY_DIR,
ensure_cover_cache_for_book,
ensure_cover_missing_tag,
ensure_unique_rel_path,
list_library_json,
make_cover_thumb_webp,
make_rel_path,
media_type_from_suffix,
prune_empty_dirs,
relative_file_info,
resolve_library_path,
scan_media,
upsert_book,
upsert_cover_cache,
)
templates = Jinja2Templates(directory="templates")
router = APIRouter()
def _collect_files() -> list[Path]:
files: list[Path] = []
for ext in ("*.epub", "*.pdf", "*.cbr", "*.cbz"):
files.extend(LIBRARY_DIR.rglob(ext))
return files
def _sync_disk_to_db() -> int:
files = _collect_files()
synced = 0
with get_db_conn() as conn:
with conn:
for p in files:
rel = p.relative_to(LIBRARY_DIR).as_posix()
meta = scan_media(p)
if not meta.get("media_type"):
continue
tags = [(s, "subject") for s in meta.get("subjects", [])]
upsert_book(conn, rel, meta, tags)
ensure_cover_missing_tag(conn, rel, bool(meta.get("has_cover")))
if bool(meta.get("has_cover")):
ensure_cover_cache_for_book(conn, rel, p, meta["media_type"])
synced += 1
with conn.cursor() as cur:
cur.execute("SELECT filename FROM library")
db_files = {r[0] for r in cur.fetchall()}
disk_files = {p.relative_to(LIBRARY_DIR).as_posix() for p in files}
for missing in db_files - disk_files:
with conn.cursor() as cur:
cur.execute("DELETE FROM library WHERE filename = %s", (missing,))
return synced
@router.get("/library", response_class=HTMLResponse)
async def library_page(request: Request):
return templates.TemplateResponse(request, "library.html", {"active": "library"})
@router.get("/api/library")
async def api_library(rescan: bool = False, include_file_info: bool = False):
# Fast path: avoid expensive full disk scan on every library page load.
# Use /library/rescan (or ?rescan=true) when a full sync is needed.
if rescan:
_sync_disk_to_db()
books = list_library_json()
if include_file_info:
for b in books:
p = resolve_library_path(b["filename"])
if p and p.exists():
b.update(relative_file_info(p))
return books
@router.post("/library/rescan")
async def library_rescan():
scanned = _sync_disk_to_db()
return {"ok": True, "scanned": scanned}
@router.post("/library/import")
async def library_import(files: list[UploadFile] = File(...)):
imported: list[str] = []
skipped: list[dict[str, str]] = []
with get_db_conn() as conn:
with conn:
for upload in files:
try:
name = upload.filename or "upload.bin"
suffix = Path(name).suffix.lower()
if suffix not in {".epub", ".pdf", ".cbr", ".cbz"}:
skipped.append({"file": name, "reason": "Unsupported file type"})
continue
data = await upload.read()
if not data:
skipped.append({"file": name, "reason": "Empty upload"})
continue
tmp = LIBRARY_DIR / f".import-{uuid.uuid4().hex}{suffix}"
tmp.parent.mkdir(parents=True, exist_ok=True)
tmp.write_bytes(data)
meta = scan_media(tmp)
media_type = meta.get("media_type")
if not media_type:
tmp.unlink(missing_ok=True)
skipped.append({"file": name, "reason": "Could not detect media type"})
continue
rel = ensure_unique_rel_path(
make_rel_path(
media_type=media_type,
publisher=meta.get("publisher", ""),
author=meta.get("author", ""),
title=meta.get("title") or Path(name).stem,
series=meta.get("series", ""),
series_index=meta.get("series_index", 0),
)
)
dest = LIBRARY_DIR / rel
dest.parent.mkdir(parents=True, exist_ok=True)
tmp.replace(dest)
rel_name = rel.as_posix()
meta["needs_review"] = True
tags = [(s, "subject") for s in meta.get("subjects", [])]
upsert_book(conn, rel_name, meta, tags)
ensure_cover_missing_tag(conn, rel_name, bool(meta.get("has_cover")))
ensure_cover_cache_for_book(conn, rel_name, dest, media_type)
imported.append(rel_name)
except Exception as e:
skipped.append({"file": upload.filename or "upload", "reason": str(e)})
finally:
await upload.close()
return {"ok": True, "imported": imported, "skipped": skipped}
@router.delete("/library/file/{filename:path}")
async def library_delete(filename: str):
full = resolve_library_path(filename)
if full is None:
return {"error": "Invalid filename"}
if not full.exists():
return {"error": "File not found"}
parent = full.parent
full.unlink()
prune_empty_dirs(parent)
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM library WHERE filename = %s", (filename,))
return {"ok": True}
@router.get("/library/cover-cached/{filename:path}")
async def library_cover_cached(filename: str):
full = resolve_library_path(filename)
if full is None or not full.exists():
return Response(status_code=404)
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
(filename,),
)
row = cur.fetchone()
if row and row[0]:
return Response(content=bytes(row[0]), media_type="image/webp")
cur.execute("SELECT media_type FROM library WHERE filename = %s", (filename,))
row = cur.fetchone()
mt = row[0] if row else media_type_from_suffix(full)
if not ensure_cover_cache_for_book(conn, filename, full, mt):
return Response(status_code=404)
cur.execute(
"SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
(filename,),
)
row = cur.fetchone()
if row and row[0]:
return Response(content=bytes(row[0]), media_type="image/webp")
return Response(status_code=404)
@router.get("/library/cover/{filename:path}")
async def library_cover(filename: str):
full = resolve_library_path(filename)
if full is None or not full.exists():
return Response(status_code=404)
mt = media_type_from_suffix(full)
if mt == "epub":
from routers.common import extract_cover_from_epub
extracted = extract_cover_from_epub(full)
if not extracted:
return Response(status_code=404)
raw, mime = extracted
return Response(content=raw, media_type=mime)
if mt in {"pdf", "cbr"}:
with get_db_conn() as conn:
with conn:
if ensure_cover_cache_for_book(conn, filename, full, mt):
with conn.cursor() as cur:
cur.execute(
"SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
(filename,),
)
row = cur.fetchone()
if row and row[0]:
return Response(content=bytes(row[0]), media_type="image/webp")
return Response(status_code=404)
@router.post("/library/cover/{filename:path}")
async def library_add_cover(filename: str, request: Request):
full = resolve_library_path(filename)
if full is None or not full.exists():
return {"error": "File not found"}
if media_type_from_suffix(full) != "epub":
return {"error": "Cover upload is only supported for EPUB"}
body = await request.json()
cover_b64 = body.get("cover_b64", "")
if not cover_b64:
return {"error": "No image provided"}
try:
cover_data = base64.b64decode(cover_b64)
add_cover_to_epub(full, cover_data)
except Exception as e:
return {"error": str(e)}
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO library (filename, media_type, has_cover, updated_at)
VALUES (%s, 'epub', TRUE, NOW())
ON CONFLICT (filename) DO UPDATE SET has_cover = TRUE, updated_at = NOW()
""",
(filename,),
)
try:
thumb = make_cover_thumb_webp(cover_data)
upsert_cover_cache(conn, filename, "image/webp", thumb)
except (UnidentifiedImageError, OSError, ValueError):
pass
ensure_cover_missing_tag(conn, filename, True)
return {"ok": True}
@router.post("/library/want-to-read/{filename:path}")
async def library_want_to_read(filename: str):
full = resolve_library_path(filename)
if full is None:
return {"error": "Invalid filename"}
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute("SELECT want_to_read FROM library WHERE filename = %s", (filename,))
row = cur.fetchone()
if not row:
return {"error": "Not found"}
val = not bool(row[0])
cur.execute(
"UPDATE library SET want_to_read = %s, updated_at = NOW() WHERE filename = %s",
(val, filename),
)
return {"ok": True, "want_to_read": val}
@router.post("/library/archive/{filename:path}")
async def library_archive(filename: str):
full = resolve_library_path(filename)
if full is None:
return {"error": "Invalid filename"}
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute("SELECT archived FROM library WHERE filename = %s", (filename,))
row = cur.fetchone()
if not row:
return {"error": "Not found"}
val = not bool(row[0])
cur.execute(
"UPDATE library SET archived = %s, updated_at = NOW() WHERE filename = %s",
(val, filename),
)
return {"ok": True, "archived": val}
@router.post("/library/new/mark-reviewed")
async def library_mark_new_reviewed(request: Request):
body = await request.json()
filenames = body.get("filenames", [])
if not isinstance(filenames, list):
return {"error": "filenames must be a list"}
cleaned: list[str] = []
seen: set[str] = set()
for raw in filenames:
if not isinstance(raw, str):
continue
name = raw.strip()
if not name or name in seen:
continue
full = resolve_library_path(name)
if full is None:
continue
cleaned.append(name)
seen.add(name)
if not cleaned:
return {"ok": True, "updated": 0}
placeholders = ", ".join(["%s"] * len(cleaned))
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
f"""
UPDATE library
SET needs_review = FALSE, updated_at = NOW()
WHERE filename IN ({placeholders})
""",
tuple(cleaned),
)
updated = cur.rowcount or 0
return {"ok": True, "updated": updated}
@router.get("/home", response_class=HTMLResponse)
async def home_page(request: Request):
return templates.TemplateResponse(request, "home.html", {"active": "home"})
@router.get("/api/home")
async def api_home():
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT l.filename, l.title, l.author, l.has_cover,
l.series, l.series_index, l.publication_status,
l.media_type,
COALESCE(rp.progress, 0) AS progress,
rp.cfi
FROM reading_progress rp
JOIN library l ON l.filename = rp.filename
WHERE rp.progress > 0
AND l.archived = FALSE
ORDER BY rp.updated_at DESC
"""
)
cr_rows = cur.fetchall()
cur.execute(
"""
SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
FROM library l
LEFT JOIN reading_sessions rs ON rs.filename = l.filename
LEFT JOIN reading_progress rp ON rp.filename = l.filename
WHERE COALESCE(l.series, '') = ''
AND l.filename NOT LIKE '%/Series/%'
AND l.archived = FALSE
AND rs.id IS NULL
AND COALESCE(rp.progress, 0) = 0
AND EXISTS (
SELECT 1
FROM book_tags bt
WHERE bt.filename = l.filename
AND bt.tag = 'Shorts'
AND bt.tag_type IN ('tag', 'subject')
)
GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
ORDER BY RANDOM()
"""
)
shorts_rows = cur.fetchall()
cur.execute(
"""
SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
FROM library l
LEFT JOIN reading_sessions rs ON rs.filename = l.filename
LEFT JOIN reading_progress rp ON rp.filename = l.filename
WHERE COALESCE(l.series, '') = ''
AND l.filename NOT LIKE '%/Series/%'
AND l.archived = FALSE
AND rs.id IS NULL
AND COALESCE(rp.progress, 0) = 0
AND NOT EXISTS (
SELECT 1
FROM book_tags bt
WHERE bt.filename = l.filename
AND bt.tag = 'Shorts'
AND bt.tag_type IN ('tag', 'subject')
)
GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
ORDER BY RANDOM()
"""
)
novels_rows = cur.fetchall()
cur.execute(
"""
SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type,
MAX(rs.read_at) AS last_read
FROM library l
JOIN reading_sessions rs ON rs.filename = l.filename
WHERE COALESCE(l.series, '') = ''
AND l.filename NOT LIKE '%/Series/%'
AND l.archived = FALSE
AND EXISTS (
SELECT 1
FROM book_tags bt
WHERE bt.filename = l.filename
AND bt.tag = 'Shorts'
AND bt.tag_type IN ('tag', 'subject')
)
GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
ORDER BY MAX(rs.read_at) ASC
"""
)
shorts_read_rows = cur.fetchall()
cur.execute(
"""
SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type,
MAX(rs.read_at) AS last_read
FROM library l
JOIN reading_sessions rs ON rs.filename = l.filename
WHERE COALESCE(l.series, '') = ''
AND l.filename NOT LIKE '%/Series/%'
AND l.archived = FALSE
AND NOT EXISTS (
SELECT 1
FROM book_tags bt
WHERE bt.filename = l.filename
AND bt.tag = 'Shorts'
AND bt.tag_type IN ('tag', 'subject')
)
GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
ORDER BY MAX(rs.read_at) ASC
"""
)
novels_read_rows = cur.fetchall()
def simple(rows):
return [
{
"filename": r[0],
"title": r[1] or "",
"author": r[2] or "",
"has_cover": bool(r[3]),
"publication_status": r[4] or "",
"media_type": r[5] or "epub",
"progress": 0,
"series": "",
"series_index": 0,
}
for r in rows
]
def simple_read(rows):
return [
{
"filename": r[0],
"title": r[1] or "",
"author": r[2] or "",
"has_cover": bool(r[3]),
"publication_status": r[4] or "",
"media_type": r[5] or "epub",
"last_read": r[6].isoformat() if r[6] else None,
"progress": 0,
"series": "",
"series_index": 0,
}
for r in rows
]
return {
"continue_reading": [
{
"filename": r[0],
"title": r[1] or "",
"author": r[2] or "",
"has_cover": bool(r[3]),
"series": r[4] or "",
"series_index": r[5] or 0,
"publication_status": r[6] or "",
"media_type": r[7] or "epub",
"progress": r[8] or 0,
"progress_cfi": r[9],
}
for r in cr_rows
],
"shorts_unread": simple(shorts_rows),
"novels_unread": simple(novels_rows),
"shorts_read": simple_read(shorts_read_rows),
"novels_read": simple_read(novels_read_rows),
}
@router.get("/stats", response_class=HTMLResponse)
async def stats_page(request: Request):
return templates.TemplateResponse(request, "stats.html", {"active": "stats"})
@router.get("/api/stats")
async def api_stats():
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute("SELECT COUNT(*)::int FROM library")
total_books = cur.fetchone()[0]
cur.execute("SELECT COUNT(*)::int FROM reading_sessions")
total_reads = cur.fetchone()[0]
cur.execute("SELECT COUNT(DISTINCT filename)::int FROM reading_sessions")
unique_books_read = cur.fetchone()[0]
cur.execute(
"""
SELECT media_type, COUNT(*)::int
FROM library
GROUP BY media_type
ORDER BY media_type
"""
)
by_type = [{"media_type": r[0], "count": r[1]} for r in cur.fetchall()]
cur.execute(
"""
WITH months AS (
SELECT date_trunc('month', CURRENT_DATE) - (n * interval '1 month') AS month_start
FROM generate_series(11, 0, -1) AS n
), counts AS (
SELECT date_trunc('month', read_at) AS month_start, COUNT(*)::int AS cnt
FROM reading_sessions
WHERE read_at >= date_trunc('month', CURRENT_DATE) - interval '11 months'
GROUP BY 1
)
SELECT to_char(m.month_start, 'YYYY-MM') AS month, COALESCE(c.cnt, 0)::int AS count
FROM months m
LEFT JOIN counts c ON c.month_start = m.month_start
ORDER BY m.month_start
"""
)
reads_by_month = [{"month": r[0], "count": r[1]} for r in cur.fetchall()]
cur.execute(
"""
SELECT EXTRACT(DOW FROM read_at)::int AS dow, COUNT(*)::int
FROM reading_sessions
GROUP BY 1
"""
)
reads_by_dow = [0] * 7
for dow, count in cur.fetchall():
idx = (int(dow) + 6) % 7
reads_by_dow[idx] = int(count)
cur.execute(
"""
SELECT EXTRACT(HOUR FROM read_at)::int AS hour, COUNT(*)::int
FROM reading_sessions
GROUP BY 1
"""
)
reads_by_hour = [0] * 24
for hour, count in cur.fetchall():
h = int(hour)
if 0 <= h <= 23:
reads_by_hour[h] = int(count)
cur.execute(
"""
SELECT bt.tag AS name, COUNT(DISTINCT bt.filename)::int AS count
FROM book_tags bt
JOIN library l ON l.filename = bt.filename
WHERE bt.tag_type IN ('genre', 'subgenre')
GROUP BY bt.tag
ORDER BY count DESC, name ASC
"""
)
genre_counts = [{"name": r[0], "count": r[1]} for r in cur.fetchall()]
cur.execute(
"""
SELECT publisher AS name, COUNT(*)::int AS count
FROM library
WHERE COALESCE(TRIM(publisher), '') <> ''
GROUP BY publisher
ORDER BY count DESC, name ASC
"""
)
publisher_counts = [{"name": r[0], "count": r[1]} for r in cur.fetchall()]
cur.execute(
"""
SELECT
COALESCE(NULLIF(TRIM(l.title), ''), l.filename) AS title,
COALESCE(l.author, '') AS author,
COUNT(*)::int AS count
FROM reading_sessions rs
JOIN library l ON l.filename = rs.filename
GROUP BY l.filename, l.title, l.author
ORDER BY count DESC, MAX(rs.read_at) DESC
LIMIT 10
"""
)
top_books = [{"title": r[0], "author": r[1], "count": r[2]} for r in cur.fetchall()]
cur.execute(
"""
SELECT
COALESCE(NULLIF(TRIM(l.title), ''), l.filename) AS title,
COALESCE(l.author, '') AS author,
COALESCE(l.publisher, '') AS publisher,
rs.read_at,
COALESCE(
array_remove(
array_agg(DISTINCT CASE WHEN bt.tag_type IN ('genre', 'subgenre') THEN bt.tag END),
NULL
),
ARRAY[]::text[]
) AS genres
FROM reading_sessions rs
JOIN library l ON l.filename = rs.filename
LEFT JOIN book_tags bt ON bt.filename = l.filename
GROUP BY rs.id, l.filename, l.title, l.author, l.publisher, rs.read_at
ORDER BY rs.read_at DESC
LIMIT 50
"""
)
history = [
{
"title": r[0],
"author": r[1],
"publisher": r[2],
"read_at": r[3].isoformat() if r[3] else None,
"genres": list(r[4] or []),
}
for r in cur.fetchall()
]
fav_genre = genre_counts[0]["name"] if genre_counts else None
fav_publisher = publisher_counts[0]["name"] if publisher_counts else None
return {
"total_books": total_books,
"total_reads": total_reads,
"unique_books_read": unique_books_read,
"by_media_type": by_type,
"reads_by_month": reads_by_month,
"reads_by_dow": reads_by_dow,
"reads_by_hour": reads_by_hour,
"genre_counts": genre_counts,
"publisher_counts": publisher_counts,
"fav_genre": fav_genre,
"fav_publisher": fav_publisher,
"top_books": top_books,
"history": history,
"generated_at": datetime.now(timezone.utc).isoformat(),
}
@router.get("/library/list")
async def library_list_compat():
return await api_library()