69 lines
2.1 KiB
Python
69 lines
2.1 KiB
Python
from pathlib import Path
|
|
|
|
import fitz
|
|
from PIL import Image, ImageOps
|
|
|
|
COVER_W = 300
|
|
COVER_H = 450
|
|
|
|
|
|
def pdf_page_count(path: Path) -> int:
|
|
with fitz.open(path) as doc:
|
|
return doc.page_count
|
|
|
|
|
|
def pdf_render_page(path: Path, page_num: int, dpi: int = 150) -> bytes:
|
|
with fitz.open(path) as doc:
|
|
if page_num < 0 or page_num >= doc.page_count:
|
|
raise IndexError("Page out of range")
|
|
page = doc.load_page(page_num)
|
|
mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)
|
|
pix = page.get_pixmap(matrix=mat, alpha=False)
|
|
return pix.tobytes("png")
|
|
|
|
|
|
def _webp_thumb_from_image(path: Path) -> bytes:
|
|
with Image.open(path) as im:
|
|
im = ImageOps.exif_transpose(im)
|
|
if im.mode not in ("RGB", "RGBA"):
|
|
im = im.convert("RGB")
|
|
thumb = ImageOps.fit(im, (COVER_W, COVER_H), method=Image.Resampling.LANCZOS)
|
|
from io import BytesIO
|
|
|
|
out = BytesIO()
|
|
thumb.save(out, format="WEBP", quality=82, method=6)
|
|
return out.getvalue()
|
|
|
|
|
|
def pdf_cover_thumb(path: Path) -> bytes:
|
|
with fitz.open(path) as doc:
|
|
if doc.page_count == 0:
|
|
raise ValueError("PDF has no pages")
|
|
page = doc.load_page(0)
|
|
pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5), alpha=False)
|
|
tmp = path.with_suffix(".cover.tmp.png")
|
|
try:
|
|
pix.save(tmp)
|
|
return _webp_thumb_from_image(tmp)
|
|
finally:
|
|
if tmp.exists():
|
|
tmp.unlink(missing_ok=True)
|
|
|
|
|
|
def pdf_scan_metadata(path: Path) -> dict:
|
|
with fitz.open(path) as doc:
|
|
meta = doc.metadata or {}
|
|
return {
|
|
"title": (meta.get("title") or path.stem or "").strip(),
|
|
"author": (meta.get("author") or "").strip(),
|
|
"publisher": (meta.get("producer") or "").strip(),
|
|
"description": (meta.get("subject") or "").strip(),
|
|
"source_url": "",
|
|
"series": "",
|
|
"series_index": 0,
|
|
"publication_status": "",
|
|
"has_cover": doc.page_count > 0,
|
|
"subjects": [],
|
|
"publish_date": "",
|
|
}
|