#!/usr/bin/env python3
"""
SEC Stats — Schedule Importer (SECSports.com API)
Pulls the complete SEC baseball/softball schedule and exports a CSV
ready to import into the Game Registry Google Sheet.

Usage:
    python3 import_schedule.py                        # 2026 baseball
    python3 import_schedule.py --sport softball       # 2026 softball
    python3 import_schedule.py --year 2027            # different year
    python3 import_schedule.py --season-id 88         # override season ID
    python3 import_schedule.py --discover             # probe for season IDs

Output:
    schedule_baseball_2026.csv       → import into Game Registry sheet
    schedule_baseball_2026_raw.json  → raw API data for debugging

Requirements:
    pip3 install requests
"""

import requests
import json
import csv
import time
import argparse
import os
from datetime import datetime, timedelta

# ---------------------------------------------------------------------------
# API config
# ---------------------------------------------------------------------------

BASE_URL = "https://www.secsports.com/api"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (compatible; SEC-Schedule-Importer/1.0)",
    "Accept":     "application/json",
    "Referer":    "https://www.secsports.com/",
}

# Known IDs — add more as discovered via --discover
SPORT_IDS = {
    "baseball": 1,
    "softball": 2,  # verify with --discover if unsure
}

SEASON_IDS = {
    ("baseball", 2026): 87,
    # ("softball", 2026): ???  — run: python3 import_schedule.py --discover --sport softball
}

SEASON_DATE_RANGES = {
    2026: ("2026-01-01T00:00:00.000Z", "2026-07-31T23:59:59.999Z"),
    2027: ("2027-01-01T00:00:00.000Z", "2027-07-31T23:59:59.999Z"),
}

# Full include string from the original working URL
API_INCLUDE = (
    "firstOpponent.customLogo,firstOpponent.officialLogo,"
    "firstOpponent.school.logo,firstOpponent.school.schoolSports,"
    "firstOpponent.opponentSports,firstOpponent.opponentAlternativeNames,"
    "firstOpponentLogo,secondOpponent.customLogo,secondOpponent.officialLogo,"
    "secondOpponent.school.logo,secondOpponent.school.schoolSports,"
    "secondOpponent.opponentSports,secondOpponent.opponentAlternativeNames,"
    "secondOpponentLogo,scheduleEventLinks,preEventArticle,postEventArticle,"
    "schedule.sport,scheduleEventBroadcasts"
)

# ---------------------------------------------------------------------------
# Team lookup
# ---------------------------------------------------------------------------

SEC_TEAMS = {
    "ALA":   "Alabama",             "ARK":   "Arkansas",
    "AUB":   "Auburn",              "FLA":   "Florida",
    "UGA":   "Georgia",             "UK":    "Kentucky",
    "LSU":   "LSU",                 "MISS":  "Ole Miss",
    "MSU":   "Mississippi State",   "MIZ":   "Missouri",
    "OKLA":  "Oklahoma",            "SC":    "South Carolina",
    "TENN":  "Tennessee",           "TEX":   "Texas",
    "TAMU":  "Texas A&M",           "VANDY": "Vanderbilt",
}

# Exact-match lookup ONLY — no substring matching.
# Every entry here must be the complete string as it appears in the API.
# This prevents "Texas Tech" → TEX, "Oklahoma State" → OKLA, etc.
NAME_TO_CODE = {
    # Primary names exactly as they appear in SECSports API
    "alabama":             "ALA",
    "arkansas":            "ARK",
    "auburn":              "AUB",
    "florida":             "FLA",
    "georgia":             "UGA",
    "kentucky":            "UK",
    "lsu":                 "LSU",
    "ole miss":            "MISS",
    "mississippi state":   "MSU",
    "missouri":            "MIZ",
    "oklahoma":            "OKLA",
    "south carolina":      "SC",
    "tennessee":           "TENN",
    "texas":               "TEX",
    "texas a&m":           "TAMU",
    "vanderbilt":          "VANDY",
    # Common alternates the API might use — all exact strings
    "mississippi":         "MISS",   # rare alternate for Ole Miss
    "texas a&m aggies":    "TAMU",
    "oklahoma sooners":    "OKLA",
    "alabama crimson tide":"ALA",
    "arkansas razorbacks": "ARK",
    "auburn tigers":       "AUB",
    "florida gators":      "FLA",
    "georgia bulldogs":    "UGA",
    "kentucky wildcats":   "UK",
    "lsu tigers":          "LSU",
    "missouri tigers":     "MIZ",
    "tennessee volunteers":"TENN",
    "texas longhorns":     "TEX",
    "south carolina gamecocks": "SC",
    "vanderbilt commodores":    "VANDY",
    "miss. state":         "MSU",
}

# Explicit blocklist — names that contain SEC keywords but are NOT SEC teams.
# Any name in this list will never resolve to an SEC code.
NOT_SEC = {
    "alabama state", "alabama a&m", "north alabama", "south alabama",
    "arkansas state", "arkansas-pine bluff", "central arkansas",
    "little rock", "ualr",
    "auburn montgomery",
    "florida state", "florida atlantic", "florida international",
    "florida a&m", "north florida", "south florida", "ucf",
    "georgia state", "georgia tech", "georgia southern", "west georgia",
    "georgia gwinnett", "kennesaw state",
    "eastern kentucky", "northern kentucky", "western kentucky",
    "middle tennessee", "tennessee tech", "tennessee state",
    "east tennessee state", "etsu",
    "texas tech", "texas state", "texas southern", "texas-arlington",
    "texas a&m-corpus christi", "texas a&m-commerce",
    "tarleton state", "lamar", "sam houston",
    "oklahoma state", "oral roberts",
    "south carolina upstate", "coastal carolina", "winthrop",
    "southeast missouri state", "semo", "missouri state",
    "mississippi valley state",
}

def resolve_code(name):
    """
    Resolve a team name to our SEC code using exact matching only.
    Returns None if not an SEC team or if on the blocklist.
    """
    if not name:
        return None
    n  = name.strip()
    nl = n.lower()

    # Check blocklist first — these must never match
    if nl in NOT_SEC:
        return None

    # Exact match only — no substring matching
    return NAME_TO_CODE.get(nl)


# ---------------------------------------------------------------------------
# API fetch
# ---------------------------------------------------------------------------

def fetch_page(sport_id, season_id, date_from, date_to, page=1, per_page=100):
    """Fetch one page of events. Returns (events, last_page) or ([], 0)."""
    params = {
        "per_page":                   per_page,
        "sort":                       "datetime",
        "filter[datetime_gte]":       date_from,
        "filter[datetime_lte]":       date_to,
        "filter[schedule.sport_id]":  sport_id,
        "filter[schedule.season_id]": season_id,
        "include":                    API_INCLUDE,
        "page":                       page,
    }
    try:
        r = requests.get(
            f"{BASE_URL}/schedule-events",
            params=params, headers=HEADERS, timeout=20
        )
        r.raise_for_status()
        data      = r.json()
        events    = data.get("data", [])
        meta      = data.get("meta", {})
        last_page = meta.get("last_page", 1) if meta else 1
        total     = meta.get("total", len(events)) if meta else len(events)

        # If no meta, check links for pagination
        if not meta:
            links = data.get("links", {})
            if links and not links.get("next"):
                last_page = page  # no next page means this is the last

        return events, last_page, total

    except requests.exceptions.HTTPError as e:
        code = e.response.status_code
        print(f"  HTTP {code}: {e}")
        if code == 401:
            print("  → API requires authentication")
        elif code == 404:
            print("  → Wrong sport_id or season_id. Run --discover")
        return [], 0, 0
    except Exception as e:
        print(f"  Error: {e}")
        return [], 0, 0


def fetch_all(sport_id, season_id, date_from, date_to, max_pages=50):
    """Fetch all pages. Returns flat list of all events."""
    all_events = []

    events, last_page, total = fetch_page(
        sport_id, season_id, date_from, date_to, page=1
    )
    if not events and last_page == 0:
        return []

    all_events.extend(events)
    print(f"  Page 1/{last_page}: {len(events)} events  (total: {total})")

    for page in range(2, min(last_page + 1, max_pages + 1)):
        events, _, _ = fetch_page(
            sport_id, season_id, date_from, date_to, page=page
        )
        if not events:
            print(f"  Page {page}: empty — stopping")
            break
        all_events.extend(events)
        print(f"  Page {page}/{last_page}: {len(events)} events")
        time.sleep(0.3)

    return all_events


# ---------------------------------------------------------------------------
# Event parser — flat structure (confirmed from raw JSON)
# ---------------------------------------------------------------------------

def parse_event(event):
    """
    Parse one flat API event dict into a normalized game record.
    The SECSports API returns flat objects — no JSON:API nesting.
    Returns None if neither team is SEC.
    """
    event_id = str(event.get("id", ""))

    # Top-level name fields are sometimes null even when the nested opponent
    # object has the name. Fall back to nested object name when null.
    def get_opponent_name(top_key, obj_key):
        name = event.get(top_key)
        if name:
            return name
        obj = event.get(obj_key) or {}
        return obj.get("name") or obj.get("long_name") or ""

    first_name  = get_opponent_name("first_opponent_name",  "first_opponent")
    second_name = get_opponent_name("second_opponent_name", "second_opponent")

    # Home/away: first_opponent_home_team is a direct boolean
    first_is_home = bool(event.get("first_opponent_home_team", True))

    if first_is_home:
        home_name, away_name = first_name, second_name
    else:
        home_name, away_name = second_name, first_name

    home_code = resolve_code(home_name)
    away_code = resolve_code(away_name)

    # Skip if neither team is SEC
    if not home_code and not away_code:
        return None

    # Game type
    # NOTE: The API's is_conference flag is unreliable — some genuine SEC
    # conference games have is_conference=False. Override: if both teams
    # resolved to SEC codes, it must be a conference game.
    neutral = bool(event.get("neutral", False))
    is_conf = bool(event.get("is_conference", False))

    if neutral:
        game_type = "Neutral Site"
    elif is_conf or (home_code and away_code):
        # Both teams are SEC → always conference, regardless of API flag
        game_type = "Conference"
    else:
        game_type = "Non-Conference"

    # Provider team
    if game_type == "Conference":
        # Home SEC team is always the file provider
        provider_code = home_code
    elif game_type == "Neutral Site":
        # Use whichever SEC team we have — prefer home slot
        provider_code = home_code or away_code
    else:
        # Non-conference: the SEC team is always provider
        provider_code = home_code or away_code

    provider_name = SEC_TEAMS.get(provider_code, "")

    # Date and time — datetime is UTC, convert to approximate ET
    dt_raw   = event.get("datetime", "")
    date_str = ""
    time_str = ""
    if dt_raw:
        try:
            dt_utc = datetime.fromisoformat(dt_raw.replace("Z", "+00:00"))
            dt_et  = dt_utc - timedelta(hours=4)  # EDT (Apr–Oct) approximation
            date_str = f"{dt_et.month}/{dt_et.day}/{dt_et.year}"
            h, m = dt_et.hour, dt_et.minute
            ampm = "AM" if h < 12 else "PM"
            h12  = h % 12 or 12
            time_str = f"{h12}:{m:02d} {ampm} ET"
        except (ValueError, AttributeError):
            date_str = dt_raw[:10]

    # Venue
    venue    = event.get("venue", "") or ""
    location = event.get("location", "") or ""

    # TV broadcasts — flat list of dicts on the event
    tv_list = []
    for br in event.get("schedule_event_broadcasts", []) or []:
        label = br.get("media_label") or br.get("source_label") or ""
        if label:
            tv_list.append(label)

    return {
        "api_event_id":  event_id,
        "date":          date_str,
        "time":          time_str,
        "game_type":     game_type,
        "vis_id":        away_code  or "",
        "vis_name":      away_name,
        "home_id":       home_code  or "",
        "home_name":     home_name,
        "location":      location,
        "stadium":       venue,
        "provider_id":   provider_code or "",
        "provider_name": provider_name,
        "tv":            ", ".join(tv_list),
    }


# ---------------------------------------------------------------------------
# Discover season IDs
# ---------------------------------------------------------------------------

def discover_ids(sport):
    sport_id = SPORT_IDS.get(sport)
    print(f"\nProbing season IDs for {sport} (sport_id={sport_id})...\n")
    for try_id in range(75, 100):
        params = {
            "per_page": 1,
            "filter[schedule.sport_id]":  sport_id,
            "filter[schedule.season_id]": try_id,
        }
        try:
            r = requests.get(
                f"{BASE_URL}/schedule-events",
                params=params, headers=HEADERS, timeout=10
            )
            if r.ok:
                total = len(r.json().get("data", []))
                if total > 0:
                    print(f"  season_id {try_id}: {total} event(s) ← FOUND")
                else:
                    print(f"  season_id {try_id}: 0 events")
            else:
                print(f"  season_id {try_id}: HTTP {r.status_code}")
        except Exception as e:
            print(f"  season_id {try_id}: {e}")
        time.sleep(0.2)


# ---------------------------------------------------------------------------
# Output
# ---------------------------------------------------------------------------

def determine_status(game):
    today = datetime.now().date()
    if not game["date"]:
        return "Pending"
    try:
        gd = datetime.strptime(game["date"], "%m/%d/%Y").date()
        return "Expected" if gd < today else "Pending"
    except ValueError:
        return "Pending"


def write_csv(games, sport, year, out_dir):
    fname = os.path.join(out_dir, f"schedule_{sport}_{year}.csv")
    fields = [
        "Game ID", "Season", "Date", "Time", "Game Type",
        "Visitor ID", "Visitor", "Home ID", "Home",
        "Location", "Stadium", "Provider Team ID", "Provider Team",
        "File Status", "Filename", "First Upload", "Latest Upload",
        "Ingested At", "DB Game ID", "Submissions", "Notes",
    ]
    with open(fname, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fields)
        w.writeheader()
        for g in games:
            notes = f"API ID: {g['api_event_id']}"
            if g.get("tv"):
                notes += f" | TV: {g['tv']}"
            w.writerow({
                "Game ID":          "",
                "Season":           year,
                "Date":             g["date"],
                "Time":             g["time"],
                "Game Type":        g["game_type"],
                "Visitor ID":       g["vis_id"],
                "Visitor":          g["vis_name"],
                "Home ID":          g["home_id"],
                "Home":             g["home_name"],
                "Location":         g["location"],
                "Stadium":          g["stadium"],
                "Provider Team ID": g["provider_id"],
                "Provider Team":    g["provider_name"],
                "File Status":      determine_status(g),
                "Filename":         "",
                "First Upload":     "",
                "Latest Upload":    "",
                "Ingested At":      "",
                "DB Game ID":       "",
                "Submissions":      0,
                "Notes":            notes,
            })
    return fname


def write_raw_json(events, sport, year, out_dir):
    fname = os.path.join(out_dir, f"schedule_{sport}_{year}_raw.json")
    with open(fname, "w", encoding="utf-8") as f:
        json.dump({"data": events}, f, indent=2)
    return fname


def print_summary(games, sport, year):
    from collections import Counter
    total    = len(games)
    conf     = sum(1 for g in games if g["game_type"] == "Conference")
    nonconf  = sum(1 for g in games if g["game_type"] == "Non-Conference")
    neutral  = sum(1 for g in games if g["game_type"] == "Neutral Site")
    pending  = sum(1 for g in games if determine_status(g) == "Pending")
    expected = sum(1 for g in games if determine_status(g) == "Expected")
    unres    = [g for g in games if not g["vis_id"] or not g["home_id"]]

    print(f"\n{'═'*54}")
    print(f"  {year} SEC {sport.title()} — Schedule Summary")
    print(f"{'═'*54}")
    print(f"  Total games:         {total}")
    print(f"  Conference:          {conf}")
    print(f"  Non-Conference:      {nonconf}")
    print(f"  Neutral Site:        {neutral}")
    print(f"  Status → Pending:    {pending}  (future games)")
    print(f"  Status → Expected:   {expected}  (past games, no file yet)")

    providers = Counter(g["provider_id"] for g in games if g["provider_id"])
    if providers:
        print(f"\n  Files expected per team:")
        for code, count in sorted(providers.items()):
            name = SEC_TEAMS.get(code, code)
            print(f"    {code:<6} {name:<22} {count} files")

    if unres:
        print(f"\n  ⚠ {len(unres)} games with unresolved team name(s):")
        for g in unres[:8]:
            print(f"    {g['date']}  '{g['vis_name']}' @ '{g['home_name']}'")
        if len(unres) > 8:
            print(f"    ... and {len(unres)-8} more")
        print(f"\n  To fix: add raw API name to NAME_TO_CODE dict, then re-run.")
    print(f"{'═'*54}")


# ---------------------------------------------------------------------------
# Main — can also parse an existing raw JSON file without hitting the API
# ---------------------------------------------------------------------------

def main():
    ap = argparse.ArgumentParser(
        description="Pull SEC schedule from SECSports.com API"
    )
    ap.add_argument("--sport",      choices=["baseball","softball"], default="baseball")
    ap.add_argument("--year",       type=int,  default=2026)
    ap.add_argument("--out",        default=".", help="Output directory")
    ap.add_argument("--pages",      type=int,  default=50)
    ap.add_argument("--sport-id",   type=int,  default=None)
    ap.add_argument("--season-id",  type=int,  default=None)
    ap.add_argument("--from-file",  default=None,
                    help="Parse an existing raw JSON file instead of hitting API")
    ap.add_argument("--discover",   action="store_true",
                    help="Probe for season IDs and exit")
    args = ap.parse_args()

    if args.discover:
        discover_ids(args.sport)
        return

    # ── Load events ──────────────────────────────────────────────────────────
    if args.from_file:
        print(f"Loading from file: {args.from_file}")
        with open(args.from_file) as f:
            raw = json.load(f)
        raw_events = raw.get("data", raw) if isinstance(raw, dict) else raw
    else:
        sport_id  = args.sport_id  or SPORT_IDS.get(args.sport)
        season_id = args.season_id or SEASON_IDS.get((args.sport, args.year))

        if not sport_id:
            print(f"Unknown sport ID for '{args.sport}'. Run --discover.")
            return
        if not season_id:
            print(f"Unknown season ID for {args.sport} {args.year}.")
            print(f"Run: python3 import_schedule.py --discover --sport {args.sport}")
            return

        date_range = SEASON_DATE_RANGES.get(
            args.year,
            (f"{args.year}-01-01T00:00:00.000Z", f"{args.year}-07-31T23:59:59.999Z")
        )

        print(f"\nSEC {args.sport.title()} {args.year} — Fetching from SECSports.com")
        print(f"Sport ID: {sport_id}  |  Season ID: {season_id}")
        raw_events = fetch_all(
            sport_id, season_id, date_range[0], date_range[1],
            max_pages=args.pages
        )

        if not raw_events:
            print("\nNo events returned.")
            print("Try: python3 import_schedule.py --discover")
            return

    # ── Parse ─────────────────────────────────────────────────────────────────
    print(f"\nParsing {len(raw_events)} events...")
    games = []
    for event in raw_events:
        parsed = parse_event(event)
        if parsed:
            games.append(parsed)

    games.sort(key=lambda g: (g["date"] or "9999", g["vis_name"]))
    print_summary(games, args.sport, args.year)

    # ── Write output ──────────────────────────────────────────────────────────
    os.makedirs(args.out, exist_ok=True)
    csv_file = write_csv(games, args.sport, args.year, args.out)

    # Only write raw JSON if we fetched from API (not --from-file)
    if not args.from_file:
        write_raw_json(raw_events, args.sport, args.year, args.out)

    print(f"\nCSV: {csv_file}")
    print(f"""
Next steps:
  1. Open your Game Registry Google Sheet
  2. File → Import → Upload {os.path.basename(csv_file)}
     → "Append to current sheet"
  3. ⚾ SEC Stats → Auto-fill Game IDs
  4. ⚾ SEC Stats → Refresh Form Game Dropdown
""")


if __name__ == "__main__":
    main()