novela/containers/novela/routers/library.py

import base64
import uuid
from datetime import datetime, timezone
from pathlib import Path

from fastapi import APIRouter, File, Request, UploadFile
from fastapi.responses import HTMLResponse, Response
from fastapi.templating import Jinja2Templates
from PIL import UnidentifiedImageError

from db import get_db_conn
from epub import add_cover_to_epub
from routers.common import (
    LIBRARY_DIR,
    ensure_cover_cache_for_book,
    ensure_cover_missing_tag,
    ensure_unique_rel_path,
    list_library_json,
    make_cover_thumb_webp,
    make_rel_path,
    media_type_from_suffix,
    prune_empty_dirs,
    relative_file_info,
    resolve_library_path,
    scan_media,
    upsert_book,
    upsert_cover_cache,
)

templates = Jinja2Templates(directory="templates")
router = APIRouter()


def _collect_files() -> list[Path]:
    files: list[Path] = []
    for ext in ("*.epub", "*.pdf", "*.cbr", "*.cbz"):
        files.extend(LIBRARY_DIR.rglob(ext))
    return files


def _sync_disk_to_db() -> int:
    files = _collect_files()
    synced = 0
    with get_db_conn() as conn:
        with conn:
            for p in files:
                rel = p.relative_to(LIBRARY_DIR).as_posix()
                meta = scan_media(p)
                if not meta.get("media_type"):
                    continue
                tags = [(s, "subject") for s in meta.get("subjects", [])]
                upsert_book(conn, rel, meta, tags)
                ensure_cover_missing_tag(conn, rel, bool(meta.get("has_cover")))
                if bool(meta.get("has_cover")):
                    ensure_cover_cache_for_book(conn, rel, p, meta["media_type"])
                synced += 1

            with conn.cursor() as cur:
                cur.execute("SELECT filename FROM library")
                db_files = {r[0] for r in cur.fetchall()}
            disk_files = {p.relative_to(LIBRARY_DIR).as_posix() for p in files}
            for missing in db_files - disk_files:
                with conn.cursor() as cur:
                    cur.execute("DELETE FROM library WHERE filename = %s", (missing,))
    return synced


@router.get("/library", response_class=HTMLResponse)
async def library_page(request: Request):
    return templates.TemplateResponse(request, "library.html", {"active": "library"})


@router.get("/api/library")
async def api_library(rescan: bool = False, include_file_info: bool = False):
    # Fast path: avoid expensive full disk scan on every library page load.
    # Use /library/rescan (or ?rescan=true) when a full sync is needed.
    if rescan:
        _sync_disk_to_db()

    books = list_library_json()
    if include_file_info:
        for b in books:
            p = resolve_library_path(b["filename"])
            if p and p.exists():
                b.update(relative_file_info(p))
    return books


@router.post("/library/rescan")
async def library_rescan():
    scanned = _sync_disk_to_db()
    return {"ok": True, "scanned": scanned}


@router.post("/library/import")
async def library_import(files: list[UploadFile] = File(...)):
    imported: list[str] = []
    skipped: list[dict[str, str]] = []
    with get_db_conn() as conn:
        with conn:
            for upload in files:
                try:
                    name = upload.filename or "upload.bin"
                    suffix = Path(name).suffix.lower()
                    if suffix not in {".epub", ".pdf", ".cbr", ".cbz"}:
                        skipped.append({"file": name, "reason": "Unsupported file type"})
                        continue

                    data = await upload.read()
                    if not data:
                        skipped.append({"file": name, "reason": "Empty upload"})
                        continue

                    tmp = LIBRARY_DIR / f".import-{uuid.uuid4().hex}{suffix}"
                    tmp.parent.mkdir(parents=True, exist_ok=True)
                    tmp.write_bytes(data)

                    meta = scan_media(tmp)
                    media_type = meta.get("media_type")
                    if not media_type:
                        tmp.unlink(missing_ok=True)
                        skipped.append({"file": name, "reason": "Could not detect media type"})
                        continue

                    rel = ensure_unique_rel_path(
                        make_rel_path(
                            media_type=media_type,
                            publisher=meta.get("publisher", ""),
                            author=meta.get("author", ""),
                            title=meta.get("title") or Path(name).stem,
                            series=meta.get("series", ""),
                            series_index=meta.get("series_index", 0),
                        )
                    )
                    dest = LIBRARY_DIR / rel
                    dest.parent.mkdir(parents=True, exist_ok=True)
                    tmp.replace(dest)

                    rel_name = rel.as_posix()
                    meta["needs_review"] = True
                    tags = [(s, "subject") for s in meta.get("subjects", [])]
                    upsert_book(conn, rel_name, meta, tags)
                    ensure_cover_missing_tag(conn, rel_name, bool(meta.get("has_cover")))
                    ensure_cover_cache_for_book(conn, rel_name, dest, media_type)
                    imported.append(rel_name)
                except Exception as e:
                    skipped.append({"file": upload.filename or "upload", "reason": str(e)})
                finally:
                    await upload.close()

    return {"ok": True, "imported": imported, "skipped": skipped}


@router.delete("/library/file/{filename:path}")
async def library_delete(filename: str):
    full = resolve_library_path(filename)
    if full is None:
        return {"error": "Invalid filename"}
    if not full.exists():
        return {"error": "File not found"}

    parent = full.parent
    full.unlink()
    prune_empty_dirs(parent)

    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute("DELETE FROM library WHERE filename = %s", (filename,))
    return {"ok": True}


@router.get("/library/cover-cached/{filename:path}")
async def library_cover_cached(filename: str):
    full = resolve_library_path(filename)
    if full is None or not full.exists():
        return Response(status_code=404)

    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute(
                    "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
                    (filename,),
                )
                row = cur.fetchone()
                if row and row[0]:
                    return Response(content=bytes(row[0]), media_type="image/webp")

                cur.execute("SELECT media_type FROM library WHERE filename = %s", (filename,))
                row = cur.fetchone()
                mt = row[0] if row else media_type_from_suffix(full)
                if not ensure_cover_cache_for_book(conn, filename, full, mt):
                    return Response(status_code=404)

                cur.execute(
                    "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
                    (filename,),
                )
                row = cur.fetchone()
                if row and row[0]:
                    return Response(content=bytes(row[0]), media_type="image/webp")
    return Response(status_code=404)


@router.get("/library/cover/{filename:path}")
async def library_cover(filename: str):
    full = resolve_library_path(filename)
    if full is None or not full.exists():
        return Response(status_code=404)

    mt = media_type_from_suffix(full)
    if mt == "epub":
        from routers.common import extract_cover_from_epub

        extracted = extract_cover_from_epub(full)
        if not extracted:
            return Response(status_code=404)
        raw, mime = extracted
        return Response(content=raw, media_type=mime)

    if mt in {"pdf", "cbr"}:
        with get_db_conn() as conn:
            with conn:
                if ensure_cover_cache_for_book(conn, filename, full, mt):
                    with conn.cursor() as cur:
                        cur.execute(
                            "SELECT thumb_webp FROM library_cover_cache WHERE filename = %s",
                            (filename,),
                        )
                        row = cur.fetchone()
                        if row and row[0]:
                            return Response(content=bytes(row[0]), media_type="image/webp")
    return Response(status_code=404)


@router.post("/library/cover/{filename:path}")
async def library_add_cover(filename: str, request: Request):
    full = resolve_library_path(filename)
    if full is None or not full.exists():
        return {"error": "File not found"}
    if media_type_from_suffix(full) != "epub":
        return {"error": "Cover upload is only supported for EPUB"}

    body = await request.json()
    cover_b64 = body.get("cover_b64", "")
    if not cover_b64:
        return {"error": "No image provided"}

    try:
        cover_data = base64.b64decode(cover_b64)
        add_cover_to_epub(full, cover_data)
    except Exception as e:
        return {"error": str(e)}

    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute(
                    """
                    INSERT INTO library (filename, media_type, has_cover, updated_at)
                    VALUES (%s, 'epub', TRUE, NOW())
                    ON CONFLICT (filename) DO UPDATE SET has_cover = TRUE, updated_at = NOW()
                    """,
                    (filename,),
                )
            try:
                thumb = make_cover_thumb_webp(cover_data)
                upsert_cover_cache(conn, filename, "image/webp", thumb)
            except (UnidentifiedImageError, OSError, ValueError):
                pass
            ensure_cover_missing_tag(conn, filename, True)
    return {"ok": True}


@router.post("/library/want-to-read/{filename:path}")
async def library_want_to_read(filename: str):
    full = resolve_library_path(filename)
    if full is None:
        return {"error": "Invalid filename"}
    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute("SELECT want_to_read FROM library WHERE filename = %s", (filename,))
                row = cur.fetchone()
                if not row:
                    return {"error": "Not found"}
                val = not bool(row[0])
                cur.execute(
                    "UPDATE library SET want_to_read = %s, updated_at = NOW() WHERE filename = %s",
                    (val, filename),
                )
    return {"ok": True, "want_to_read": val}


@router.post("/library/archive/{filename:path}")
async def library_archive(filename: str):
    full = resolve_library_path(filename)
    if full is None:
        return {"error": "Invalid filename"}
    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute("SELECT archived FROM library WHERE filename = %s", (filename,))
                row = cur.fetchone()
                if not row:
                    return {"error": "Not found"}
                val = not bool(row[0])
                cur.execute(
                    "UPDATE library SET archived = %s, updated_at = NOW() WHERE filename = %s",
                    (val, filename),
                )
    return {"ok": True, "archived": val}


@router.post("/library/new/mark-reviewed")
async def library_mark_new_reviewed(request: Request):
    body = await request.json()
    filenames = body.get("filenames", [])
    if not isinstance(filenames, list):
        return {"error": "filenames must be a list"}

    cleaned: list[str] = []
    seen: set[str] = set()
    for raw in filenames:
        if not isinstance(raw, str):
            continue
        name = raw.strip()
        if not name or name in seen:
            continue
        full = resolve_library_path(name)
        if full is None:
            continue
        cleaned.append(name)
        seen.add(name)

    if not cleaned:
        return {"ok": True, "updated": 0}

    placeholders = ", ".join(["%s"] * len(cleaned))
    with get_db_conn() as conn:
        with conn:
            with conn.cursor() as cur:
                cur.execute(
                    f"""
                    UPDATE library
                    SET needs_review = FALSE, updated_at = NOW()
                    WHERE filename IN ({placeholders})
                    """,
                    tuple(cleaned),
                )
                updated = cur.rowcount or 0
    return {"ok": True, "updated": updated}


@router.get("/home", response_class=HTMLResponse)
async def home_page(request: Request):
    return templates.TemplateResponse(request, "home.html", {"active": "home"})


@router.get("/api/home")
async def api_home():
    with get_db_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
                """
                SELECT l.filename, l.title, l.author, l.has_cover,
                       l.series, l.series_index, l.publication_status,
                       l.media_type,
                       COALESCE(rp.progress, 0) AS progress,
                       rp.cfi
                FROM reading_progress rp
                JOIN library l ON l.filename = rp.filename
                WHERE rp.progress > 0
                  AND l.archived = FALSE
                ORDER BY rp.updated_at DESC
                """
            )
            cr_rows = cur.fetchall()

            cur.execute(
                """
                SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
                FROM library l
                LEFT JOIN reading_sessions rs ON rs.filename = l.filename
                LEFT JOIN reading_progress rp ON rp.filename = l.filename
                WHERE COALESCE(l.series, '') = ''
                  AND l.filename NOT LIKE '%/Series/%'
                  AND l.archived = FALSE
                  AND rs.id IS NULL
                  AND COALESCE(rp.progress, 0) = 0
                  AND EXISTS (
                      SELECT 1
                      FROM book_tags bt
                      WHERE bt.filename = l.filename
                        AND bt.tag = 'Shorts'
                        AND bt.tag_type IN ('tag', 'subject')
                  )
                GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
                ORDER BY RANDOM()
                """
            )
            shorts_rows = cur.fetchall()

            cur.execute(
                """
                SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
                FROM library l
                LEFT JOIN reading_sessions rs ON rs.filename = l.filename
                LEFT JOIN reading_progress rp ON rp.filename = l.filename
                WHERE COALESCE(l.series, '') = ''
                  AND l.filename NOT LIKE '%/Series/%'
                  AND l.archived = FALSE
                  AND rs.id IS NULL
                  AND COALESCE(rp.progress, 0) = 0
                  AND NOT EXISTS (
                      SELECT 1
                      FROM book_tags bt
                      WHERE bt.filename = l.filename
                        AND bt.tag = 'Shorts'
                        AND bt.tag_type IN ('tag', 'subject')
                  )
                GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
                ORDER BY RANDOM()
                """
            )
            novels_rows = cur.fetchall()

            cur.execute(
                """
                SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type,
                       MAX(rs.read_at) AS last_read
                FROM library l
                JOIN reading_sessions rs ON rs.filename = l.filename
                WHERE COALESCE(l.series, '') = ''
                  AND l.filename NOT LIKE '%/Series/%'
                  AND l.archived = FALSE
                  AND EXISTS (
                      SELECT 1
                      FROM book_tags bt
                      WHERE bt.filename = l.filename
                        AND bt.tag = 'Shorts'
                        AND bt.tag_type IN ('tag', 'subject')
                  )
                GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
                ORDER BY MAX(rs.read_at) ASC
                """
            )
            shorts_read_rows = cur.fetchall()

            cur.execute(
                """
                SELECT l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type,
                       MAX(rs.read_at) AS last_read
                FROM library l
                JOIN reading_sessions rs ON rs.filename = l.filename
                WHERE COALESCE(l.series, '') = ''
                  AND l.filename NOT LIKE '%/Series/%'
                  AND l.archived = FALSE
                  AND NOT EXISTS (
                      SELECT 1
                      FROM book_tags bt
                      WHERE bt.filename = l.filename
                        AND bt.tag = 'Shorts'
                        AND bt.tag_type IN ('tag', 'subject')
                  )
                GROUP BY l.filename, l.title, l.author, l.has_cover, l.publication_status, l.media_type
                ORDER BY MAX(rs.read_at) ASC
                """
            )
            novels_read_rows = cur.fetchall()

    def simple(rows):
        return [
            {
                "filename": r[0],
                "title": r[1] or "",
                "author": r[2] or "",
                "has_cover": bool(r[3]),
                "publication_status": r[4] or "",
                "media_type": r[5] or "epub",
                "progress": 0,
                "series": "",
                "series_index": 0,
            }
            for r in rows
        ]

    def simple_read(rows):
        return [
            {
                "filename": r[0],
                "title": r[1] or "",
                "author": r[2] or "",
                "has_cover": bool(r[3]),
                "publication_status": r[4] or "",
                "media_type": r[5] or "epub",
                "last_read": r[6].isoformat() if r[6] else None,
                "progress": 0,
                "series": "",
                "series_index": 0,
            }
            for r in rows
        ]

    return {
        "continue_reading": [
            {
                "filename": r[0],
                "title": r[1] or "",
                "author": r[2] or "",
                "has_cover": bool(r[3]),
                "series": r[4] or "",
                "series_index": r[5] or 0,
                "publication_status": r[6] or "",
                "media_type": r[7] or "epub",
                "progress": r[8] or 0,
                "progress_cfi": r[9],
            }
            for r in cr_rows
        ],
        "shorts_unread": simple(shorts_rows),
        "novels_unread": simple(novels_rows),
        "shorts_read": simple_read(shorts_read_rows),
        "novels_read": simple_read(novels_read_rows),
    }


@router.get("/stats", response_class=HTMLResponse)
async def stats_page(request: Request):
    return templates.TemplateResponse(request, "stats.html", {"active": "stats"})


@router.get("/api/stats")
async def api_stats():
    with get_db_conn() as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT COUNT(*)::int FROM library")
            total_books = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*)::int FROM reading_sessions")
            total_reads = cur.fetchone()[0]

            cur.execute("SELECT COUNT(DISTINCT filename)::int FROM reading_sessions")
            unique_books_read = cur.fetchone()[0]

            cur.execute(
                """
                SELECT media_type, COUNT(*)::int
                FROM library
                GROUP BY media_type
                ORDER BY media_type
                """
            )
            by_type = [{"media_type": r[0], "count": r[1]} for r in cur.fetchall()]

            cur.execute(
                """
                WITH months AS (
                    SELECT date_trunc('month', CURRENT_DATE) - (n * interval '1 month') AS month_start
                    FROM generate_series(11, 0, -1) AS n
                ), counts AS (
                    SELECT date_trunc('month', read_at) AS month_start, COUNT(*)::int AS cnt
                    FROM reading_sessions
                    WHERE read_at >= date_trunc('month', CURRENT_DATE) - interval '11 months'
                    GROUP BY 1
                )
                SELECT to_char(m.month_start, 'YYYY-MM') AS month, COALESCE(c.cnt, 0)::int AS count
                FROM months m
                LEFT JOIN counts c ON c.month_start = m.month_start
                ORDER BY m.month_start
                """
            )
            reads_by_month = [{"month": r[0], "count": r[1]} for r in cur.fetchall()]

            cur.execute(
                """
                SELECT EXTRACT(DOW FROM read_at)::int AS dow, COUNT(*)::int
                FROM reading_sessions
                GROUP BY 1
                """
            )
            reads_by_dow = [0] * 7
            for dow, count in cur.fetchall():
                idx = (int(dow) + 6) % 7
                reads_by_dow[idx] = int(count)

            cur.execute(
                """
                SELECT EXTRACT(HOUR FROM read_at)::int AS hour, COUNT(*)::int
                FROM reading_sessions
                GROUP BY 1
                """
            )
            reads_by_hour = [0] * 24
            for hour, count in cur.fetchall():
                h = int(hour)
                if 0 <= h <= 23:
                    reads_by_hour[h] = int(count)

            cur.execute(
                """
                SELECT bt.tag AS name, COUNT(DISTINCT bt.filename)::int AS count
                FROM book_tags bt
                JOIN library l ON l.filename = bt.filename
                WHERE bt.tag_type IN ('genre', 'subgenre')
                GROUP BY bt.tag
                ORDER BY count DESC, name ASC
                """
            )
            genre_counts = [{"name": r[0], "count": r[1]} for r in cur.fetchall()]

            cur.execute(
                """
                SELECT publisher AS name, COUNT(*)::int AS count
                FROM library
                WHERE COALESCE(TRIM(publisher), '') <> ''
                GROUP BY publisher
                ORDER BY count DESC, name ASC
                """
            )
            publisher_counts = [{"name": r[0], "count": r[1]} for r in cur.fetchall()]

            cur.execute(
                """
                SELECT
                    COALESCE(NULLIF(TRIM(l.title), ''), l.filename) AS title,
                    COALESCE(l.author, '') AS author,
                    COUNT(*)::int AS count
                FROM reading_sessions rs
                JOIN library l ON l.filename = rs.filename
                GROUP BY l.filename, l.title, l.author
                ORDER BY count DESC, MAX(rs.read_at) DESC
                LIMIT 10
                """
            )
            top_books = [{"title": r[0], "author": r[1], "count": r[2]} for r in cur.fetchall()]

            cur.execute(
                """
                SELECT
                    COALESCE(NULLIF(TRIM(l.title), ''), l.filename) AS title,
                    COALESCE(l.author, '') AS author,
                    COALESCE(l.publisher, '') AS publisher,
                    rs.read_at,
                    COALESCE(
                        array_remove(
                            array_agg(DISTINCT CASE WHEN bt.tag_type IN ('genre', 'subgenre') THEN bt.tag END),
                            NULL
                        ),
                        ARRAY[]::text[]
                    ) AS genres
                FROM reading_sessions rs
                JOIN library l ON l.filename = rs.filename
                LEFT JOIN book_tags bt ON bt.filename = l.filename
                GROUP BY rs.id, l.filename, l.title, l.author, l.publisher, rs.read_at
                ORDER BY rs.read_at DESC
                LIMIT 50
                """
            )
            history = [
                {
                    "title": r[0],
                    "author": r[1],
                    "publisher": r[2],
                    "read_at": r[3].isoformat() if r[3] else None,
                    "genres": list(r[4] or []),
                }
                for r in cur.fetchall()
            ]

    fav_genre = genre_counts[0]["name"] if genre_counts else None
    fav_publisher = publisher_counts[0]["name"] if publisher_counts else None

    return {
        "total_books": total_books,
        "total_reads": total_reads,
        "unique_books_read": unique_books_read,
        "by_media_type": by_type,
        "reads_by_month": reads_by_month,
        "reads_by_dow": reads_by_dow,
        "reads_by_hour": reads_by_hour,
        "genre_counts": genre_counts,
        "publisher_counts": publisher_counts,
        "fav_genre": fav_genre,
        "fav_publisher": fav_publisher,
        "top_books": top_books,
        "history": history,
        "generated_at": datetime.now(timezone.utc).isoformat(),
    }


@router.get("/library/list")
async def library_list_compat():
    return await api_library()