#!/usr/bin/env python3
"""
SEC Stats — Upload Watcher
Watches the Google Drive folder where Form XML uploads land,
automatically ingests new files into the database, and exports JSON.

No Google API keys required. Uses a lookup JSON file synced via
Google Drive for Desktop to match filenames to Game IDs.

How it works:
  1. Apps Script writes sec_stats_lookup.json to My Drive/SEC Stats/
     every time a school submits a file via the Form
  2. Google Drive for Desktop syncs that file to your Mac automatically
  3. This watcher reads the local lookup file to match filename → Game ID
  4. Copies the XML into data/raw/baseball/2026/TEAM/
  5. Runs parse_games.py --game-id to ingest it
  6. Runs export_json.py to refresh all 12 JSON files

Usage:
  python3 watch_uploads.py              # scan once and exit
  python3 watch_uploads.py --daemon     # run continuously (every 30s)
  python3 watch_uploads.py --interval 60 --daemon
"""

import os
import sys
import time
import shutil
import logging
import argparse
import subprocess
import json
from pathlib import Path

# ---------------------------------------------------------------------------
# CONFIGURATION — edit these if your paths differ
# ---------------------------------------------------------------------------

GOOGLE_ACCOUNT = "tbrasher@sec.org"

WATCH_FOLDER = Path(
    f"~/Library/CloudStorage/GoogleDrive-{GOOGLE_ACCOUNT}"
    "/My Drive/SEC Baseball \u2013 Game File Upload (File responses)"
    "/Upload XML Game File (File responses)"
).expanduser()

LOOKUP_FILE = Path(
    f"~/Library/CloudStorage/GoogleDrive-{GOOGLE_ACCOUNT}"
    "/My Drive/SEC Stats/sec_stats_lookup.json"
).expanduser()

SEC_STATS_DIR = Path("~/Documents/sec_stats").expanduser()

SEASON = "2026"
SPORT  = "baseball"

RAW_DIR       = SEC_STATS_DIR / "data" / "raw" / SPORT / SEASON
DB_PATH       = SEC_STATS_DIR / f"sec_stats_{SPORT}.db"
PARSE_SCRIPT  = SEC_STATS_DIR / "parse_games.py"
EXPORT_SCRIPT = SEC_STATS_DIR / "export_json.py"
JSON_OUT      = SEC_STATS_DIR / "json" / SPORT
PROCESSED_LOG = SEC_STATS_DIR / "watch_processed.json"
LOG_FILE      = SEC_STATS_DIR / "watch_uploads.log"

DEFAULT_INTERVAL = 30

# ---------------------------------------------------------------------------
# LOGGING
# ---------------------------------------------------------------------------

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s  %(levelname)-8s  %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[
        logging.FileHandler(LOG_FILE),
        logging.StreamHandler(sys.stdout),
    ]
)
log = logging.getLogger("watcher")

# ---------------------------------------------------------------------------
# PROCESSED FILE TRACKING
# ---------------------------------------------------------------------------

def load_processed() -> set:
    if PROCESSED_LOG.exists():
        with open(PROCESSED_LOG) as f:
            return set(json.load(f))
    return set()

def save_processed(processed: set):
    with open(PROCESSED_LOG, "w") as f:
        json.dump(sorted(processed), f, indent=2)

# ---------------------------------------------------------------------------
# LOOKUP FILE
# ---------------------------------------------------------------------------

def load_lookup() -> dict:
    if not LOOKUP_FILE.exists():
        log.debug(f"Lookup file not yet synced: {LOOKUP_FILE}")
        return {}
    try:
        with open(LOOKUP_FILE) as f:
            return json.load(f)
    except (json.JSONDecodeError, OSError) as e:
        log.warning(f"Could not read lookup file: {e}")
        return {}

def find_game_info(filename: str, lookup: dict):
    """Returns (game_id, team_id, game_type, is_correction) or (None, None, None, False)."""
    for key in [filename.lower(), filename, Path(filename).name.lower()]:
        entry = lookup.get(key)
        if entry:
            return (entry.get("game_id"), entry.get("team_id"),
                    entry.get("game_type", ""), entry.get("is_correction", False))
    return None, None, None, False

# ---------------------------------------------------------------------------
# FILE INGESTION
# ---------------------------------------------------------------------------

def ingest_file(xml_path: Path, game_id: str, team_id: str, game_type: str = "", is_correction: bool = False) -> bool:
    dest_dir = RAW_DIR / team_id
    dest_dir.mkdir(parents=True, exist_ok=True)
    dest_path = dest_dir / xml_path.name

    shutil.copy2(xml_path, dest_path)
    log.info(f"Copied → {dest_path.relative_to(SEC_STATS_DIR)}")

    # Parse — pass game_type from Registry so DB has correct conference flag
    parse_cmd = [
        sys.executable, str(PARSE_SCRIPT),
        "--game-id",   game_id,
        "--db",        str(DB_PATH),
        str(dest_path),
    ]
    if game_type:
        parse_cmd += ["--game-type", game_type]
    if is_correction:
        parse_cmd += ["--replace"]
        log.info(f"Game type: {game_type or 'unknown'} | CORRECTION — will replace existing data")
    else:
        log.info(f"Game type: {game_type or 'unknown'}")
    log.info(f"Ingesting {game_id}...")
    result = subprocess.run(parse_cmd, capture_output=True, text=True,
                            cwd=str(SEC_STATS_DIR))
    for line in (result.stdout or "").strip().splitlines():
        log.info(f"  {line}")
    if result.stderr.strip():
        log.warning(result.stderr.strip())
    if result.returncode != 0:
        log.error(f"parse_games.py failed for {xml_path.name}")
        return False

    # Export
    export_cmd = [
        sys.executable, str(EXPORT_SCRIPT),
        "--db",  str(DB_PATH),
        "--out", str(JSON_OUT),
    ]
    log.info("Exporting JSON...")
    result = subprocess.run(export_cmd, capture_output=True, text=True,
                            cwd=str(SEC_STATS_DIR))
    for line in (result.stdout or "").strip().splitlines():
        log.info(f"  {line}")
    if result.returncode != 0:
        log.error("export_json.py failed")
        return False

    log.info(f"✓ {game_id} ingested and JSON refreshed")
    return True

# ---------------------------------------------------------------------------
# SCAN
# ---------------------------------------------------------------------------

def scan_once(processed: set) -> int:
    if not WATCH_FOLDER.exists():
        log.error(f"Watch folder not found: {WATCH_FOLDER}")
        log.error("Make sure Google Drive for Desktop is running.")
        return 0

    lookup = load_lookup()

    try:
        xml_files = sorted(
            f for f in WATCH_FOLDER.iterdir()
            if f.is_file() and f.suffix.upper() == ".XML"
            and f.name not in processed
        )
    except OSError as e:
        log.error(f"Could not read watch folder: {e}")
        return 0

    if not xml_files:
        log.debug("No new XML files.")
        return 0

    log.info(f"Found {len(xml_files)} new XML file(s)")
    ingested = 0

    for xml_path in xml_files:
        log.info(f"─── {xml_path.name}")

        game_id, team_id, game_type, is_correction = find_game_info(xml_path.name, lookup)

        if not game_id or not team_id:
            game_type = ""
            is_correction = False
            log.warning(
                f"No lookup entry for '{xml_path.name}' — will retry next scan.\n"
                f"  (Apps Script lookup file may not have synced yet.)\n"
                f"  To ingest manually:\n"
                f"  python3 parse_games.py --game-id GAME_ID '{xml_path}' --db {DB_PATH}"
            )
            continue  # don't mark processed — retry next scan

        corr_label = " [CORRECTION]" if is_correction else ""
        log.info(f"Matched: {game_id} (team: {team_id}, type: {game_type or 'unknown'}){corr_label}")
        success = ingest_file(xml_path, game_id, team_id, game_type, is_correction)
        processed.add(xml_path.name)  # always mark so corrupt files don't loop
        if success:
            ingested += 1
        else:
            log.error(
                f"Failed for {xml_path.name}. Check log for details.\n"
                f"  To retry: remove '{xml_path.name}' from {PROCESSED_LOG}"
            )

    return ingested

# ---------------------------------------------------------------------------
# RUN
# ---------------------------------------------------------------------------

def run(interval: int = DEFAULT_INTERVAL, daemon: bool = False):
    log.info("=" * 60)
    log.info("SEC Stats Upload Watcher")
    log.info(f"Watch:    {WATCH_FOLDER}")
    log.info(f"Lookup:   {LOOKUP_FILE}")
    log.info(f"Database: {DB_PATH}")
    log.info(f"Mode:     {'daemon every ' + str(interval) + 's' if daemon else 'single scan'}")
    log.info("=" * 60)

    processed = load_processed()
    log.info(f"Previously processed: {len(processed)} file(s)")

    if daemon:
        log.info("Running. Ctrl+C to stop.\n")
        try:
            while True:
                count = scan_once(processed)
                save_processed(processed)
                if count:
                    log.info(f"Ingested {count} file(s) this scan.\n")
                time.sleep(interval)
        except KeyboardInterrupt:
            log.info("Watcher stopped.")
            save_processed(processed)
    else:
        count = scan_once(processed)
        save_processed(processed)
        log.info(f"Done — ingested {count} file(s).")

# ---------------------------------------------------------------------------
# ENTRY POINT
# ---------------------------------------------------------------------------

def main():
    ap = argparse.ArgumentParser(
        description="Watch Google Drive uploads and auto-ingest SEC Stats XML files"
    )
    ap.add_argument("--daemon",   action="store_true",
                    help="Run continuously polling every --interval seconds")
    ap.add_argument("--interval", type=int, default=DEFAULT_INTERVAL,
                    help=f"Poll interval in seconds (default: {DEFAULT_INTERVAL})")
    ap.add_argument("--verbose",  action="store_true",
                    help="Show debug log messages")
    args = ap.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    run(interval=args.interval, daemon=args.daemon)

if __name__ == "__main__":
    main()
