novela/containers/novela/routers/backup.py
Ivo Oskamp e4d2e2c636 DB-stored books, full-text search, backup restore, and AO3 scraper
- DB-stored books (Fase 1–6): chapters and images stored in PostgreSQL; grabber writes to DB, EPUB→DB conversion, DB→EPUB export, FTS search page (/search)
- Chapter editor: Monaco editor supports DB-stored books; inline title editing
- Grabber: DB/EPUB storage toggle on Convert page
- Backup: restore from Dropbox snapshot (browse snapshots, restore individual or selected files)
- AO3 scraper: initial implementation
- Changelog: v0.1.2 and v0.1.3 entries added to changelog.py and changelog.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 15:13:08 +02:00

1328 lines
45 KiB
Python

import asyncio
import hashlib
import json
import os
import shutil
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from tempfile import NamedTemporaryFile
from urllib.parse import urlencode
import dropbox
import httpx
from dropbox.exceptions import ApiError, AuthError
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from db import get_db_conn
from routers.common import scan_media, upsert_book
from security import decrypt_value, encrypt_value, is_encrypted_value
templates = Jinja2Templates(directory="templates")
router = APIRouter()
LIBRARY_DIR = Path(os.environ.get("LIBRARY_DIR", "library"))
CONFIG_DIR = Path(os.environ.get("CONFIG_DIR", "config"))
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
MANIFEST_PATH = CONFIG_DIR / "backup_manifest.json"
DEFAULT_DROPBOX_ROOT = "/novela"
DEFAULT_RETENTION_COUNT = 14
DEFAULT_SCHEDULE_ENABLED = False
DEFAULT_SCHEDULE_INTERVAL_HOURS = 24
BACKUP_TASKS: dict[int, asyncio.Task] = {}
BACKUP_PROGRESS: dict[int, dict] = {} # log_id → {done, total, phase}
SCHEDULER_TASK: asyncio.Task | None = None
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _load_manifest() -> dict[str, dict[str, float | int | str]]:
if not MANIFEST_PATH.exists():
return {}
try:
data = json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
if isinstance(data, dict):
return data
except Exception:
pass
return {}
def _save_manifest(manifest: dict[str, dict[str, float | int | str]]) -> None:
MANIFEST_PATH.write_text(json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8")
def _dropbox_credential_details() -> dict:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT username, password, updated_at FROM credentials WHERE site = 'dropbox' LIMIT 1"
)
row = cur.fetchone()
if not row:
return {"configured": False, "token": "", "updated_at": None}
username_raw, password_raw, updated_at = row
username = decrypt_value(username_raw)
password = decrypt_value(password_raw)
token = (password or username or "").strip()
if not is_encrypted_value(username_raw) or not is_encrypted_value(password_raw):
cur.execute(
"""
UPDATE credentials
SET username = %s, password = %s, updated_at = NOW()
WHERE site = 'dropbox'
RETURNING updated_at
""",
(encrypt_value(username), encrypt_value(password)),
)
upd = cur.fetchone()
if upd:
updated_at = upd[0]
return {
"configured": bool(token),
"token": token,
"updated_at": updated_at.isoformat() if updated_at else None,
}
def _load_dropbox_token() -> str:
return _dropbox_credential_details().get("token", "")
def _load_dropbox_app_key() -> str:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT password FROM credentials WHERE site = 'dropbox_app_key' LIMIT 1"
)
row = cur.fetchone()
if not row:
return ""
return decrypt_value(row[0]).strip()
def _load_dropbox_app_secret() -> str:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT password FROM credentials WHERE site = 'dropbox_app_secret' LIMIT 1"
)
row = cur.fetchone()
if not row:
return ""
return decrypt_value(row[0]).strip()
def _save_dropbox_app_key(app_key: str) -> None:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox_app_key', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(app_key.strip())),
)
def _save_dropbox_app_secret(app_secret: str) -> None:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox_app_secret', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(app_secret.strip())),
)
def _normalize_dropbox_root(value: str | None) -> str:
root = (value or "").strip() or DEFAULT_DROPBOX_ROOT
if not root.startswith("/"):
root = "/" + root
root = "/" + "/".join(part for part in root.split("/") if part)
return root or DEFAULT_DROPBOX_ROOT
def _dropbox_root_details() -> dict:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT username, password, updated_at FROM credentials WHERE site = 'dropbox_backup_root' LIMIT 1"
)
row = cur.fetchone()
if not row:
env_val = os.environ.get("DROPBOX_BACKUP_ROOT", DEFAULT_DROPBOX_ROOT)
return {
"root": _normalize_dropbox_root(env_val),
"updated_at": None,
}
username_raw, password_raw, updated_at = row
username = decrypt_value(username_raw)
password = decrypt_value(password_raw)
root = _normalize_dropbox_root(password or username or DEFAULT_DROPBOX_ROOT)
if not is_encrypted_value(username_raw) or not is_encrypted_value(password_raw):
cur.execute(
"""
UPDATE credentials
SET username = %s, password = %s, updated_at = NOW()
WHERE site = 'dropbox_backup_root'
RETURNING updated_at
""",
(encrypt_value(""), encrypt_value(root)),
)
upd = cur.fetchone()
if upd:
updated_at = upd[0]
return {
"root": root,
"updated_at": updated_at.isoformat() if updated_at else None,
}
def _load_dropbox_root() -> str:
return _dropbox_root_details().get("root", DEFAULT_DROPBOX_ROOT)
def _dropbox_retention_details() -> dict:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT username, password, updated_at FROM credentials WHERE site = 'dropbox_backup_retention' LIMIT 1"
)
row = cur.fetchone()
if not row:
return {"retention_count": DEFAULT_RETENTION_COUNT, "updated_at": None}
username_raw, password_raw, updated_at = row
username = decrypt_value(username_raw)
password = decrypt_value(password_raw)
raw = (password or username or "").strip()
try:
retention_count = max(1, int(raw))
except Exception:
retention_count = DEFAULT_RETENTION_COUNT
if not is_encrypted_value(username_raw) or not is_encrypted_value(password_raw):
cur.execute(
"""
UPDATE credentials
SET username = %s, password = %s, updated_at = NOW()
WHERE site = 'dropbox_backup_retention'
RETURNING updated_at
""",
(encrypt_value(""), encrypt_value(str(retention_count))),
)
upd = cur.fetchone()
if upd:
updated_at = upd[0]
return {
"retention_count": retention_count,
"updated_at": updated_at.isoformat() if updated_at else None,
}
def _load_dropbox_retention_count() -> int:
return int(_dropbox_retention_details().get("retention_count", DEFAULT_RETENTION_COUNT))
def _dropbox_schedule_details() -> dict:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT username, password, updated_at FROM credentials WHERE site = 'dropbox_backup_schedule' LIMIT 1"
)
row = cur.fetchone()
if not row:
return {
"enabled": DEFAULT_SCHEDULE_ENABLED,
"interval_hours": DEFAULT_SCHEDULE_INTERVAL_HOURS,
"updated_at": None,
}
username_raw, password_raw, updated_at = row
username = decrypt_value(username_raw)
password = decrypt_value(password_raw)
raw = (password or username or "").strip().lower()
enabled = False
interval_hours = DEFAULT_SCHEDULE_INTERVAL_HOURS
try:
obj = json.loads(raw) if raw.startswith("{") else None
except Exception:
obj = None
if isinstance(obj, dict):
enabled = bool(obj.get("enabled", DEFAULT_SCHEDULE_ENABLED))
try:
interval_hours = max(1, int(obj.get("interval_hours", DEFAULT_SCHEDULE_INTERVAL_HOURS)))
except Exception:
interval_hours = DEFAULT_SCHEDULE_INTERVAL_HOURS
else:
parts = raw.split(":")
if len(parts) == 2:
enabled = parts[0] in {"1", "true", "yes", "on"}
try:
interval_hours = max(1, int(parts[1]))
except Exception:
interval_hours = DEFAULT_SCHEDULE_INTERVAL_HOURS
norm = json.dumps({"enabled": enabled, "interval_hours": interval_hours}, separators=(",", ":"))
if not is_encrypted_value(username_raw) or not is_encrypted_value(password_raw):
cur.execute(
"""
UPDATE credentials
SET username = %s, password = %s, updated_at = NOW()
WHERE site = 'dropbox_backup_schedule'
RETURNING updated_at
""",
(encrypt_value(""), encrypt_value(norm)),
)
upd = cur.fetchone()
if upd:
updated_at = upd[0]
return {
"enabled": enabled,
"interval_hours": interval_hours,
"updated_at": updated_at.isoformat() if updated_at else None,
}
def _load_backup_schedule() -> tuple[bool, int]:
d = _dropbox_schedule_details()
return bool(d.get("enabled", DEFAULT_SCHEDULE_ENABLED)), int(d.get("interval_hours", DEFAULT_SCHEDULE_INTERVAL_HOURS))
def _save_backup_schedule(enabled: bool, interval_hours: int) -> None:
interval = max(1, int(interval_hours))
payload = json.dumps({"enabled": bool(enabled), "interval_hours": interval}, separators=(",", ":"))
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox_backup_schedule', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(payload)),
)
def _dropbox_join(root: str, *parts: str) -> str:
clean_root = _normalize_dropbox_root(root)
segs = [p.strip("/") for p in parts if p and p.strip("/")]
if clean_root == "/":
return "/" + "/".join(segs) if segs else "/"
if not segs:
return clean_root
return clean_root + "/" + "/".join(segs)
def _save_dropbox_root(root: str) -> None:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox_backup_root', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(_normalize_dropbox_root(root))),
)
def _save_dropbox_retention_count(retention_count: int) -> None:
val = max(1, int(retention_count))
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox_backup_retention', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(str(val))),
)
def _dbx() -> dropbox.Dropbox:
"""
Maak een Dropbox client aan.
Voorkeursvolgorde:
1. App key + app secret + refresh token -> automatische token refresh
2. Legacy access token (achterwaartse compatibiliteit)
"""
token = _load_dropbox_token()
if not token:
raise RuntimeError("Dropbox token not found in credentials (site='dropbox').")
app_key = _load_dropbox_app_key()
app_secret = _load_dropbox_app_secret()
try:
if app_key and app_secret:
client = dropbox.Dropbox(
oauth2_refresh_token=token,
app_key=app_key,
app_secret=app_secret,
timeout=120,
)
else:
# Fallback: legacy access token
client = dropbox.Dropbox(token, timeout=120)
client.users_get_current_account()
except AuthError as e:
raise RuntimeError(f"Dropbox auth failed: {e}")
return client
def _ensure_dropbox_dir(client: dropbox.Dropbox, path: str) -> None:
if not path or path == "/":
return
parts = [p for p in path.split("/") if p]
cur = ""
for p in parts:
cur += "/" + p
try:
client.files_create_folder_v2(cur)
except ApiError:
pass
def _dropbox_upload_bytes(client: dropbox.Dropbox, target_path: str, data: bytes) -> int:
parent = str(Path(target_path).parent).replace("\\", "/")
if not parent.startswith("/"):
parent = "/" + parent
_ensure_dropbox_dir(client, parent)
client.files_upload(data, target_path, mode=dropbox.files.WriteMode.overwrite, mute=True)
return len(data)
def _dropbox_exists(client: dropbox.Dropbox, path: str) -> bool:
try:
client.files_get_metadata(path)
return True
except ApiError as e:
text = str(e).lower()
if "not_found" in text or "path/not_found" in text:
return False
raise
def _dropbox_list_files_recursive(client: dropbox.Dropbox, root: str) -> list[str]:
paths: list[str] = []
try:
res = client.files_list_folder(root, recursive=True)
except ApiError as e:
text = str(e).lower()
if "not_found" in text or "path/not_found" in text:
return []
raise
while True:
for entry in res.entries:
if isinstance(entry, dropbox.files.FileMetadata):
paths.append(entry.path_lower or entry.path_display or "")
if not res.has_more:
break
res = client.files_list_folder_continue(res.cursor)
return [p for p in paths if p]
def _dropbox_delete_paths(client: dropbox.Dropbox, paths: list[str]) -> int:
deleted = 0
for p in paths:
try:
client.files_delete_v2(p)
deleted += 1
except ApiError:
pass
return deleted
def _iter_library_files() -> list[Path]:
if not LIBRARY_DIR.exists():
return []
return [p for p in LIBRARY_DIR.rglob("*") if p.is_file()]
def _current_file_state(path: Path) -> dict[str, float | int]:
st = path.stat()
return {"mtime": st.st_mtime, "size": st.st_size}
def _sha256_file(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
h.update(chunk)
return h.hexdigest()
def _snapshot_name() -> str:
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
return f"snapshot-{stamp}.json"
def _object_path(objects_root: str, sha256: str) -> str:
return _dropbox_join(objects_root, sha256[:2], sha256)
def _pg_dump_cmd(tmp_path: Path) -> list[str]:
return [
"pg_dump",
"-h",
os.environ.get("POSTGRES_HOST", "postgres"),
"-p",
str(os.environ.get("POSTGRES_PORT", "5432")),
"-U",
os.environ.get("POSTGRES_USER", "novela"),
"-d",
os.environ.get("POSTGRES_DB", "novela"),
"-f",
str(tmp_path),
]
def _run_pg_dump() -> tuple[bytes, str]:
db = os.environ.get("POSTGRES_DB", "novela")
env = os.environ.copy()
env["PGPASSWORD"] = os.environ.get("POSTGRES_PASSWORD", "")
with NamedTemporaryFile(suffix=".sql", delete=False) as tmp:
tmp_path = Path(tmp.name)
try:
cmd = _pg_dump_cmd(tmp_path)
proc = subprocess.run(cmd, env=env, capture_output=True, text=True)
if proc.returncode != 0:
stderr = (proc.stderr or "").strip()
raise RuntimeError(f"pg_dump failed: {stderr or 'unknown error'}")
data = tmp_path.read_bytes()
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
return data, f"{db}-{stamp}.sql"
finally:
tmp_path.unlink(missing_ok=True)
def _has_running_backup() -> bool:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT id
FROM backup_log
WHERE status = 'running' AND finished_at IS NULL
ORDER BY started_at DESC
"""
)
rows = [int(r[0]) for r in cur.fetchall()]
if not rows:
return False
active_ids = set(BACKUP_TASKS.keys())
stale_ids = [rid for rid in rows if rid not in active_ids]
if stale_ids:
cur.execute(
"""
UPDATE backup_log
SET status = 'error',
error_msg = COALESCE(error_msg, 'Interrupted: service restart or crash'),
finished_at = NOW()
WHERE id = ANY(%s)
""",
(stale_ids,),
)
return any(rid in active_ids for rid in rows)
def _insert_backup_log_running() -> int:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO backup_log (status, started_at)
VALUES ('running', NOW())
RETURNING id
"""
)
return int(cur.fetchone()[0])
def _finish_backup_log(log_id: int, *, status: str, files_count: int | None, size_bytes: int | None, error_msg: str | None) -> None:
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE backup_log
SET status = %s,
files_count = %s,
size_bytes = %s,
error_msg = %s,
finished_at = NOW()
WHERE id = %s
""",
(status, files_count, size_bytes, error_msg, log_id),
)
def _list_snapshot_paths(client: dropbox.Dropbox, snapshots_root: str) -> list[str]:
files = _dropbox_list_files_recursive(client, snapshots_root)
return sorted([p for p in files if p.endswith(".json")], reverse=True)
def _load_snapshot_data(client: dropbox.Dropbox, snapshot_path: str) -> dict:
_meta, res = client.files_download(snapshot_path)
raw = res.content
parsed = json.loads(raw.decode("utf-8", errors="replace"))
return parsed if isinstance(parsed, dict) else {}
def _enforce_snapshot_retention(
client: dropbox.Dropbox,
snapshots_root: str,
keep_count: int,
) -> tuple[list[str], list[str]]:
all_snapshots = _list_snapshot_paths(client, snapshots_root)
keep = max(1, int(keep_count))
kept = all_snapshots[:keep]
to_delete = all_snapshots[keep:]
if to_delete:
_dropbox_delete_paths(client, to_delete)
return kept, to_delete
def _collect_hashes_from_snapshots(client: dropbox.Dropbox, snapshot_paths: list[str]) -> set[str]:
used: set[str] = set()
for path in snapshot_paths:
try:
snap = _load_snapshot_data(client, path)
except Exception:
continue
files = snap.get("files", {}) if isinstance(snap, dict) else {}
if not isinstance(files, dict):
continue
for item in files.values():
if not isinstance(item, dict):
continue
sha = str(item.get("sha256") or "").lower()
if len(sha) == 64 and all(c in "0123456789abcdef" for c in sha):
used.add(sha)
return used
def _prune_orphan_objects(client: dropbox.Dropbox, objects_root: str, referenced_hashes: set[str]) -> int:
object_files = _dropbox_list_files_recursive(client, objects_root)
to_delete: list[str] = []
for p in object_files:
name = Path(p).name.lower()
if len(name) == 64 and all(c in "0123456789abcdef" for c in name):
if name not in referenced_hashes:
to_delete.append(p)
return _dropbox_delete_paths(client, to_delete)
def _run_backup_internal(*, dry_run: bool, progress_key: int | None = None) -> tuple[int, int]:
def _prog(done: int, total: int, phase: str) -> None:
if progress_key is not None:
BACKUP_PROGRESS[progress_key] = {"done": done, "total": total, "phase": phase}
client = None if dry_run else _dbx()
manifest = _load_manifest()
files = _iter_library_files()
total_files = len(files)
_prog(0, total_files, "scanning")
uploaded_count = 0
uploaded_size = 0
new_manifest: dict[str, dict[str, float | int | str]] = {}
dropbox_root = _load_dropbox_root()
retention_count = _load_dropbox_retention_count()
objects_root = _dropbox_join(dropbox_root, "library_objects")
snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
if client is not None:
_ensure_dropbox_dir(client, objects_root)
_ensure_dropbox_dir(client, snapshots_root)
snapshot_files: dict[str, dict[str, float | int | str]] = {}
for idx, path in enumerate(files):
_prog(idx, total_files, "uploading")
rel = path.relative_to(LIBRARY_DIR).as_posix()
state = _current_file_state(path)
prev = manifest.get(rel, {}) if isinstance(manifest.get(rel), dict) else {}
sha256 = ""
if (
prev
and prev.get("mtime") == state["mtime"]
and prev.get("size") == state["size"]
and isinstance(prev.get("sha256"), str)
):
sha256 = str(prev.get("sha256"))
else:
sha256 = _sha256_file(path)
entry = {"mtime": state["mtime"], "size": state["size"], "sha256": sha256}
new_manifest[rel] = entry
snapshot_files[rel] = entry
object_target = _object_path(objects_root, sha256)
if client is not None:
if not _dropbox_exists(client, object_target):
data = path.read_bytes()
uploaded_size += _dropbox_upload_bytes(client, object_target, data)
uploaded_count += 1
else:
# Dry run reports potential upload work for changed objects.
if not prev or prev.get("sha256") != sha256:
uploaded_size += int(state["size"])
uploaded_count += 1
_prog(total_files, total_files, "snapshot")
snapshot = {
"created_at": _now_iso(),
"retention_count": retention_count,
"files": snapshot_files,
}
snapshot_data = json.dumps(snapshot, sort_keys=True, separators=(",", ":")).encode("utf-8")
snapshot_name = _snapshot_name()
snapshot_target = _dropbox_join(snapshots_root, snapshot_name)
if client is not None:
uploaded_size += _dropbox_upload_bytes(client, snapshot_target, snapshot_data)
uploaded_count += 1
kept_snapshots, _deleted_snapshots = _enforce_snapshot_retention(
client, snapshots_root, retention_count
)
referenced_hashes = _collect_hashes_from_snapshots(client, kept_snapshots)
_prune_orphan_objects(client, objects_root, referenced_hashes)
else:
uploaded_size += len(snapshot_data)
uploaded_count += 1
_prog(total_files, total_files, "pg_dump")
dump_data, dump_name = _run_pg_dump()
dump_target = _dropbox_join(dropbox_root, "postgres", dump_name)
if client is not None:
uploaded_size += _dropbox_upload_bytes(client, dump_target, dump_data)
else:
uploaded_size += len(dump_data)
uploaded_count += 1
if not dry_run:
_save_manifest(new_manifest)
return uploaded_count, uploaded_size
@router.get("/backup", response_class=HTMLResponse)
async def backup_page(request: Request):
template = "backup.html"
if not Path("templates/backup.html").exists():
template = "settings.html"
return templates.TemplateResponse(request, template, {"active": "backup"})
@router.get("/api/backup/credentials")
async def backup_dropbox_credentials():
details = _dropbox_credential_details()
root_details = _dropbox_root_details()
retention_details = _dropbox_retention_details()
token = details.get("token", "")
preview = ""
if token:
preview = f"{token[:4]}...{token[-4:]}" if len(token) >= 10 else "(configured)"
app_key = _load_dropbox_app_key()
app_secret = _load_dropbox_app_secret()
return {
"configured": bool(token),
"token_preview": preview,
"updated_at": details.get("updated_at"),
"app_key_configured": bool(app_key and app_secret),
"dropbox_root": root_details.get("root", DEFAULT_DROPBOX_ROOT),
"root_updated_at": root_details.get("updated_at"),
"retention_count": int(retention_details.get("retention_count", DEFAULT_RETENTION_COUNT)),
"retention_updated_at": retention_details.get("updated_at"),
"schedule_enabled": _dropbox_schedule_details().get("enabled", DEFAULT_SCHEDULE_ENABLED),
"schedule_interval_hours": _dropbox_schedule_details().get("interval_hours", DEFAULT_SCHEDULE_INTERVAL_HOURS),
"schedule_updated_at": _dropbox_schedule_details().get("updated_at"),
}
@router.post("/api/backup/credentials")
async def backup_dropbox_credentials_save(request: Request):
body = {}
try:
body = await request.json()
except Exception:
pass
try:
existing_token = _load_dropbox_token()
token = (body.get("token") or "").strip() or existing_token
if not token:
return {"ok": False, "error": "Dropbox token is required."}
app_key = (body.get("app_key") or "").strip()
app_secret = (body.get("app_secret") or "").strip()
dropbox_root = _normalize_dropbox_root(body.get("dropbox_root") or _load_dropbox_root())
raw_retention = body.get("retention_count", _load_dropbox_retention_count())
try:
retention_count = max(1, int(raw_retention))
except Exception:
retention_count = DEFAULT_RETENTION_COUNT
schedule_enabled = bool(body.get("schedule_enabled", _load_backup_schedule()[0]))
raw_interval = body.get("schedule_interval_hours", _load_backup_schedule()[1])
try:
schedule_interval_hours = max(1, int(raw_interval))
except Exception:
schedule_interval_hours = DEFAULT_SCHEDULE_INTERVAL_HOURS
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(token)),
)
if app_key:
_save_dropbox_app_key(app_key)
if app_secret:
_save_dropbox_app_secret(app_secret)
_save_dropbox_root(dropbox_root)
_save_dropbox_retention_count(retention_count)
_save_backup_schedule(schedule_enabled, schedule_interval_hours)
return {
"ok": True,
"dropbox_root": dropbox_root,
"retention_count": retention_count,
"schedule_enabled": schedule_enabled,
"schedule_interval_hours": schedule_interval_hours,
}
except Exception as e:
return {"ok": False, "error": str(e)}
@router.delete("/api/backup/credentials")
async def backup_dropbox_credentials_delete():
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""DELETE FROM credentials WHERE site IN (
'dropbox',
'dropbox_app_key',
'dropbox_app_secret',
'dropbox_backup_root',
'dropbox_backup_retention',
'dropbox_backup_schedule'
)"""
)
return {"ok": True}
@router.get("/api/backup/health")
async def backup_health():
token_present = bool(_load_dropbox_token())
pg_dump_path = shutil.which("pg_dump")
dropbox_ok = False
dropbox_error = None
if token_present:
try:
_dbx()
dropbox_ok = True
except Exception as e:
dropbox_error = str(e)
dropbox_root = _load_dropbox_root()
retention_count = _load_dropbox_retention_count()
schedule_enabled, schedule_interval_hours = _load_backup_schedule()
return {
"token_present": token_present,
"dropbox_ok": dropbox_ok,
"dropbox_error": dropbox_error,
"dropbox_root": dropbox_root,
"retention_count": retention_count,
"schedule_enabled": schedule_enabled,
"schedule_interval_hours": schedule_interval_hours,
"pg_dump_available": bool(pg_dump_path),
"pg_dump_path": pg_dump_path,
"library_exists": LIBRARY_DIR.exists(),
"library_path": str(LIBRARY_DIR.resolve()),
}
@router.get("/api/backup/status")
async def backup_status():
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT id, status, files_count, size_bytes, error_msg, started_at, finished_at
FROM backup_log
ORDER BY started_at DESC
LIMIT 1
"""
)
row = cur.fetchone()
if not row:
return {"status": "never"}
return {
"id": row[0],
"status": row[1],
"files_count": row[2],
"size_bytes": row[3],
"error_msg": row[4],
"started_at": row[5].isoformat() if row[5] else None,
"finished_at": row[6].isoformat() if row[6] else None,
}
@router.get("/api/backup/history")
async def backup_history():
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT id, status, files_count, size_bytes, error_msg, started_at, finished_at
FROM backup_log
ORDER BY started_at DESC
LIMIT 20
"""
)
rows = cur.fetchall()
return [
{
"id": r[0],
"status": r[1],
"files_count": r[2],
"size_bytes": r[3],
"error_msg": r[4],
"started_at": r[5].isoformat() if r[5] else None,
"finished_at": r[6].isoformat() if r[6] else None,
}
for r in rows
]
def _start_backup_task(*, dry_run: bool) -> int:
log_id = _insert_backup_log_running()
task = asyncio.create_task(_run_backup_job(log_id, dry_run))
BACKUP_TASKS[log_id] = task
return log_id
def _is_scheduled_backup_due(interval_hours: int) -> bool:
with get_db_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT finished_at
FROM backup_log
WHERE status = 'success' AND finished_at IS NOT NULL
ORDER BY finished_at DESC
LIMIT 1
"""
)
row = cur.fetchone()
if not row or not row[0]:
return True
last = row[0]
if last.tzinfo is None:
last = last.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
return (now - last).total_seconds() >= max(1, int(interval_hours)) * 3600
async def _scheduler_loop() -> None:
while True:
try:
enabled, interval_hours = _load_backup_schedule()
if enabled and not _has_running_backup() and _is_scheduled_backup_due(interval_hours):
_start_backup_task(dry_run=False)
except Exception:
# Keep scheduler alive; errors are visible in backup history when runs fail.
pass
await asyncio.sleep(60)
async def start_backup_scheduler() -> None:
global SCHEDULER_TASK
if SCHEDULER_TASK is None or SCHEDULER_TASK.done():
SCHEDULER_TASK = asyncio.create_task(_scheduler_loop())
async def stop_backup_scheduler() -> None:
global SCHEDULER_TASK
if SCHEDULER_TASK is not None:
SCHEDULER_TASK.cancel()
try:
await SCHEDULER_TASK
except asyncio.CancelledError:
pass
SCHEDULER_TASK = None
async def _run_backup_job(log_id: int, dry_run: bool) -> None:
BACKUP_PROGRESS[log_id] = {"done": 0, "total": 0, "phase": "starting"}
try:
files_count, size_bytes = await asyncio.to_thread(
_run_backup_internal, dry_run=dry_run, progress_key=log_id
)
_finish_backup_log(
log_id,
status="success",
files_count=files_count,
size_bytes=size_bytes,
error_msg=None,
)
except Exception as e:
_finish_backup_log(
log_id,
status="error",
files_count=None,
size_bytes=None,
error_msg=str(e),
)
finally:
BACKUP_TASKS.pop(log_id, None)
BACKUP_PROGRESS.pop(log_id, None)
@router.post("/api/backup/oauth/prepare")
async def oauth_prepare(request: Request):
"""
Sla app key + secret op en geef de Dropbox autorisatie-URL terug.
De gebruiker opent deze URL in de browser en krijgt een code te zien.
Gebruikt token_access_type=offline voor een refresh token dat niet verloopt.
"""
body = {}
try:
body = await request.json()
except Exception:
pass
app_key = (body.get("app_key") or "").strip()
app_secret = (body.get("app_secret") or "").strip()
if not app_key or not app_secret:
return {"ok": False, "error": "app_key and app_secret are required."}
_save_dropbox_app_key(app_key)
_save_dropbox_app_secret(app_secret)
params = urlencode({
"client_id": app_key,
"response_type": "code",
"token_access_type": "offline",
})
auth_url = f"https://www.dropbox.com/oauth2/authorize?{params}"
return {"ok": True, "auth_url": auth_url}
@router.post("/api/backup/oauth/exchange")
async def oauth_exchange(request: Request):
"""
Wissel de door de gebruiker ingevoerde autorisatiecode in voor een refresh token.
Slaat het refresh token op als het Dropbox-token.
"""
body = {}
try:
body = await request.json()
except Exception:
pass
code = (body.get("code") or "").strip()
if not code:
return {"ok": False, "error": "Authorization code is required."}
app_key = _load_dropbox_app_key()
app_secret = _load_dropbox_app_secret()
if not app_key or not app_secret:
return {"ok": False, "error": "App key and secret not found. Run prepare step first."}
try:
async with httpx.AsyncClient(timeout=30) as client:
resp = await client.post(
"https://api.dropbox.com/oauth2/token",
data={
"code": code,
"grant_type": "authorization_code",
},
auth=(app_key, app_secret),
)
resp.raise_for_status()
data = resp.json()
except httpx.HTTPStatusError as e:
return {"ok": False, "error": f"Dropbox API error: {e.response.status_code} {e.response.text[:200]}"}
except Exception as e:
return {"ok": False, "error": str(e)}
refresh_token = data.get("refresh_token", "").strip()
if not refresh_token:
return {"ok": False, "error": "No refresh token in Dropbox response. Make sure token_access_type=offline was used."}
with get_db_conn() as conn:
with conn:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO credentials (site, username, password, updated_at)
VALUES ('dropbox', %s, %s, NOW())
ON CONFLICT (site) DO UPDATE
SET username = EXCLUDED.username,
password = EXCLUDED.password,
updated_at = NOW()
""",
(encrypt_value(""), encrypt_value(refresh_token)),
)
return {"ok": True, "message": "Refresh token saved. Dropbox is now connected."}
@router.get("/api/backup/progress")
async def backup_progress():
if not BACKUP_PROGRESS:
return {"running": False}
log_id = max(BACKUP_PROGRESS.keys())
p = BACKUP_PROGRESS[log_id]
return {
"running": True,
"log_id": log_id,
"done": p.get("done", 0),
"total": p.get("total", 0),
"phase": p.get("phase", ""),
}
@router.post("/api/backup/run")
async def run_backup(request: Request):
body = {}
try:
body = await request.json()
except Exception:
pass
dry_run = bool(body.get("dry_run", False))
if _has_running_backup():
return {
"ok": False,
"status": "running",
"error": "A backup is already running.",
"finished_at": _now_iso(),
}
log_id = _start_backup_task(dry_run=dry_run)
return {
"ok": True,
"backup_id": log_id,
"status": "running",
"dry_run": dry_run,
"message": "Backup started in background.",
"started_at": _now_iso(),
}
def _parse_snapshot_date(name: str) -> str:
"""Parse 'snapshot-20260329-123456.json''2026-03-29T12:34:56Z'."""
stem = Path(name).stem # snapshot-20260329-123456
parts = stem.split("-")
if len(parts) >= 3:
d, t = parts[1], parts[2]
if len(d) == 8 and len(t) == 6:
return f"{d[:4]}-{d[4:6]}-{d[6:]}T{t[:2]}:{t[2:4]}:{t[4:]}Z"
return ""
def _download_and_restore(client: dropbox.Dropbox, objects_root: str, rel: str, info: dict) -> None:
sha256 = str(info.get("sha256") or "")
if not sha256:
raise ValueError("No sha256 in snapshot entry")
obj_path = _object_path(objects_root, sha256)
_meta, res = client.files_download(obj_path)
data = res.content
dest = LIBRARY_DIR / rel
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_bytes(data)
meta = scan_media(dest)
tags = [(s, "subject") for s in meta.get("subjects", [])]
with get_db_conn() as conn:
with conn:
upsert_book(conn, rel, meta, tags)
@router.get("/api/backup/snapshots")
async def list_snapshots():
try:
client = await asyncio.to_thread(_dbx)
except Exception as e:
return {"ok": False, "error": str(e), "snapshots": []}
dropbox_root = _load_dropbox_root()
snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
try:
paths = await asyncio.to_thread(_list_snapshot_paths, client, snapshots_root)
except Exception as e:
return {"ok": False, "error": str(e), "snapshots": []}
snapshots = [
{"name": Path(p).name, "created_at": _parse_snapshot_date(Path(p).name)}
for p in paths
]
return {"ok": True, "snapshots": snapshots}
@router.get("/api/backup/snapshots/{snapshot_name}/files")
async def snapshot_files(snapshot_name: str):
try:
client = await asyncio.to_thread(_dbx)
except Exception as e:
return {"ok": False, "error": str(e), "files": []}
dropbox_root = _load_dropbox_root()
snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
snapshot_path = _dropbox_join(snapshots_root, snapshot_name)
try:
snap = await asyncio.to_thread(_load_snapshot_data, client, snapshot_path)
except Exception as e:
return {"ok": False, "error": str(e), "files": []}
files_data = snap.get("files", {})
result = [
{
"path": rel,
"size": info.get("size", 0),
"sha256": info.get("sha256", ""),
"exists_locally": (LIBRARY_DIR / rel).exists(),
}
for rel, info in sorted(files_data.items())
if isinstance(info, dict)
]
return {"ok": True, "snapshot": snapshot_name, "files": result}
@router.post("/api/backup/restore")
async def restore_files(request: Request):
body = {}
try:
body = await request.json()
except Exception:
pass
snapshot_name = (body.get("snapshot_name") or "").strip()
files_to_restore: list[str] = body.get("files", [])
if not snapshot_name:
return {"ok": False, "error": "snapshot_name is required"}
if not files_to_restore:
return {"ok": False, "error": "No files specified"}
try:
client = await asyncio.to_thread(_dbx)
except Exception as e:
return {"ok": False, "error": str(e)}
dropbox_root = _load_dropbox_root()
snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
objects_root = _dropbox_join(dropbox_root, "library_objects")
snapshot_path = _dropbox_join(snapshots_root, snapshot_name)
try:
snap = await asyncio.to_thread(_load_snapshot_data, client, snapshot_path)
except Exception as e:
return {"ok": False, "error": f"Failed to load snapshot: {e}"}
files_data = snap.get("files", {})
results = []
for rel in files_to_restore:
if rel not in files_data:
results.append({"path": rel, "ok": False, "error": "Not found in snapshot"})
continue
try:
await asyncio.to_thread(_download_and_restore, client, objects_root, rel, files_data[rel])
results.append({"path": rel, "ok": True})
except Exception as e:
results.append({"path": rel, "ok": False, "error": str(e)})
ok_count = sum(1 for r in results if r["ok"])
return {"ok": True, "restored": ok_count, "total": len(results), "results": results}