novela/containers/novela/recover_decock049.py

257 lines
8.2 KiB
Python

"""
One-time recovery: retrieve 049 - De Cock en het lijk op drift.epub from
Dropbox backup, place it at the correct library path, and re-insert the DB row.
Run inside the novela container:
python recover_decock049.py [--execute]
Without --execute: dry-run only (shows what would be restored).
"""
import json
import os
import sys
from pathlib import Path
import dropbox
import psycopg2
from security import decrypt_value
DRY_RUN = "--execute" not in sys.argv
LIBRARY_DIR = Path("library")
TARGET_REL = "epub/Unknown Publisher/A.C. Baantjer/Series/De Cock (Series)/049 - De Cock en het lijk op drift.epub"
SEARCH_KEYWORDS = ["de cock", "049", "lijk op drift"]
def _db_conn():
return psycopg2.connect(
f"host=novela-db "
f"dbname={os.environ['POSTGRES_DB']} "
f"user={os.environ['POSTGRES_USER']} "
f"password={os.environ['POSTGRES_PASSWORD']}"
)
def _load_dropbox_token(conn) -> str:
with conn.cursor() as cur:
cur.execute(
"SELECT username, password FROM credentials WHERE site = 'dropbox' LIMIT 1"
)
row = cur.fetchone()
if not row:
raise RuntimeError("No Dropbox token in credentials table.")
username_raw, password_raw = row
username = decrypt_value(username_raw)
password = decrypt_value(password_raw)
token = (password or username or "").strip()
if not token:
raise RuntimeError("Dropbox token is empty.")
return token
def _load_dropbox_root(conn) -> str:
with conn.cursor() as cur:
cur.execute(
"SELECT username, password FROM credentials WHERE site = 'dropbox_backup_root' LIMIT 1"
)
row = cur.fetchone()
if not row:
return "/novela"
_, password_raw = row
root = decrypt_value(password_raw).strip() or "/novela"
if not root.startswith("/"):
root = "/" + root
return root
def _dropbox_join(root: str, *parts: str) -> str:
segs = [p.strip("/") for p in parts if p and p.strip("/")]
base = root.rstrip("/")
return base + "/" + "/".join(segs) if segs else base
def _list_snapshots(client, snapshots_root: str) -> list[str]:
paths = []
try:
res = client.files_list_folder(snapshots_root, recursive=False)
except Exception as e:
raise RuntimeError(f"Cannot list snapshots folder '{snapshots_root}': {e}")
while True:
for entry in res.entries:
if isinstance(entry, dropbox.files.FileMetadata):
if entry.name.endswith(".json"):
paths.append(entry.path_display)
if not res.has_more:
break
res = client.files_list_folder_continue(res.cursor)
return sorted(paths, reverse=True) # newest first
def _load_snapshot(client, path: str) -> dict:
_meta, resp = client.files_download(path)
return json.loads(resp.content.decode("utf-8", errors="replace"))
def _find_file_in_snapshot(snap: dict) -> tuple[str, str] | None:
"""Return (rel_path, sha256) for De Cock 049, or None."""
files = snap.get("files", {})
for rel, info in files.items():
rel_lower = rel.lower()
if all(kw in rel_lower for kw in SEARCH_KEYWORDS):
sha256 = info.get("sha256", "")
return rel, sha256
return None
def _download_object(client, objects_root: str, sha256: str) -> bytes:
obj_path = _dropbox_join(objects_root, sha256[:2], sha256)
print(f" Downloading object: {obj_path}")
_meta, resp = client.files_download(obj_path)
return resp.content
def _insert_db_row(conn, filename: str, snap_entry: dict, orig_filename: str) -> None:
"""Copy library row from orig_filename if it exists, else insert minimal row."""
with conn.cursor() as cur:
# Check if orig row exists in DB
cur.execute("SELECT * FROM library WHERE filename = %s LIMIT 1", (orig_filename,))
orig = cur.fetchone()
if orig:
cols = [desc.name for desc in conn.cursor().description] if False else None
# Fetch column names separately
with conn.cursor() as cur2:
cur2.execute(
"SELECT column_name FROM information_schema.columns "
"WHERE table_name='library' ORDER BY ordinal_position"
)
cols = [r[0] for r in cur2.fetchall()]
with conn.cursor() as cur3:
cur3.execute(
f"SELECT {', '.join(cols)} FROM library WHERE filename = %s LIMIT 1",
(orig_filename,),
)
row = cur3.fetchone()
if row:
data = dict(zip(cols, row))
data["filename"] = filename
col_list = ", ".join(data.keys())
placeholders = ", ".join(["%s"] * len(data))
cur3.execute(
f"INSERT INTO library ({col_list}) VALUES ({placeholders}) "
f"ON CONFLICT (filename) DO NOTHING",
list(data.values()),
)
print(f" DB row copied from '{orig_filename}''{filename}'")
return
# No orig row: insert minimal
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO library (filename, title, author, publisher, series, series_index,
media_type, has_cover, needs_review)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (filename) DO NOTHING
""",
(
filename,
"De Cock en het lijk op drift",
"A.C. Baantjer",
"Unknown Publisher",
"De Cock (Series)",
49,
"epub",
False,
True,
),
)
print(f" DB row inserted (minimal) for '{filename}'")
def main():
print(f"Mode: {'DRY RUN' if DRY_RUN else '*** EXECUTE ***'}")
print()
conn = _db_conn()
token = _load_dropbox_token(conn)
dropbox_root = _load_dropbox_root(conn)
print(f"Dropbox root: {dropbox_root}")
client = dropbox.Dropbox(token, timeout=120)
try:
acct = client.users_get_current_account()
print(f"Dropbox account: {acct.email}")
except Exception as e:
print(f"ERROR: Dropbox auth failed: {e}")
conn.close()
return
objects_root = _dropbox_join(dropbox_root, "library_objects")
snapshots_root = _dropbox_join(dropbox_root, "library_snapshots")
print(f"\nListing snapshots in: {snapshots_root}")
snapshots = _list_snapshots(client, snapshots_root)
print(f"Found {len(snapshots)} snapshots.")
for s in snapshots:
print(f" {s}")
found_rel = None
found_sha256 = None
found_snapshot = None
for snap_path in snapshots:
print(f"\nSearching snapshot: {snap_path}")
snap = _load_snapshot(client, snap_path)
result = _find_file_in_snapshot(snap)
if result:
found_rel, found_sha256 = result
found_snapshot = snap_path
print(f" FOUND: {found_rel}")
print(f" sha256: {found_sha256}")
break
else:
print(" Not found in this snapshot.")
if not found_rel:
print("\nERROR: File not found in any snapshot. Cannot recover.")
conn.close()
return
target_path = LIBRARY_DIR / TARGET_REL
print(f"\nTarget path: {target_path}")
if target_path.exists():
print("File already exists at target path. Nothing to do.")
conn.close()
return
if DRY_RUN:
print(f"\nDry run: would download sha256={found_sha256}")
print(f" and write to: {target_path}")
print("\nRun with --execute to apply.")
conn.close()
return
# Download
data = _download_object(client, objects_root, found_sha256)
print(f" Downloaded {len(data):,} bytes.")
# Write file
target_path.parent.mkdir(parents=True, exist_ok=True)
target_path.write_bytes(data)
print(f" Written to: {target_path}")
# DB
with conn:
_insert_db_row(conn, TARGET_REL, {}, found_rel)
print(f"\nDone. File recovered to: {target_path}")
conn.close()
if __name__ == "__main__":
main()