#!/usr/bin/env python3
"""
SEC Baseball/Softball StatCrew XML Parser — v3
Folder-aware, replace-mode, season-scoped.

Expected folder structure:
  data/raw/2026/UA/GAME01.XML
  data/raw/2026/UT/GAME05.XML
  ...

Game IDs are derived from season + team folder + filename,
guaranteeing uniqueness even when multiple teams submit identically-named files.

Usage:
  python3 parse_games.py data/raw/2026/UA/GAME01.XML          # single file
  python3 parse_games.py data/raw/2026/UA/*.XML               # one team
  python3 parse_games.py data/raw/2026/**/*.XML               # full season
  python3 parse_games.py --replace data/raw/2026/UA/GAME01.XML  # re-ingest correction
  python3 parse_games.py --scan data/raw/2026               # auto-scan folder tree
"""

import xml.etree.ElementTree as ET
import sqlite3
import sys
import os
import glob
import argparse
from datetime import datetime, timezone


DB_PATH = "sec_stats.db"

SEC_TEAMS = {
    "ALA":   "Alabama",             "ARK":   "Arkansas",
    "AUB":   "Auburn",              "FLA":   "Florida",
    "UGA":   "Georgia",             "UK":    "Kentucky",
    "LSU":   "LSU",                 "MISS":  "Ole Miss",
    "MSU":   "Mississippi State",   "MIZ":   "Missouri",
    "OKLA":  "Oklahoma",            "SC":    "South Carolina",
    "TENN":  "Tennessee",           "TEX":   "Texas",
    "TAMU":  "Texas A&M",           "VANDY": "Vanderbilt",
}

# Reverse lookup: team display name → roster team_code
SEC_NAME_TO_CODE = {v: k for k, v in SEC_TEAMS.items()}

# Normalize SEC team names from XML — handles abbreviations, uppercase, alternate IDs
# Maps any known variant → canonical display name (e.g. "KENTUCKY" → "Kentucky", "UK" → "Kentucky")
_SEC_NAME_VARIANTS = {}
for code, name in SEC_TEAMS.items():
    _SEC_NAME_VARIANTS[name] = name
    _SEC_NAME_VARIANTS[name.upper()] = name
    _SEC_NAME_VARIANTS[name.lower()] = name
    _SEC_NAME_VARIANTS[code] = name
    _SEC_NAME_VARIANTS[code.lower()] = name
# Manual additions for common XML IDs that don't match SEC_TEAMS codes
for alias, name in {
    "UA": "Alabama", "BAMA": "Alabama",
    "AU": "Auburn", "AUBURN": "Auburn",
    "UF": "Florida", "GATORS": "Florida",
    "UGA": "Georgia", "GA": "Georgia",
    "UM": "Missouri",
    "OU": "Oklahoma",
    "MSST": "Mississippi State",
    "OLEMISS": "Ole Miss", "OLE MISS": "Ole Miss",
    "SCAR": "South Carolina",
    "VU": "Vanderbilt", "VANDY": "Vanderbilt",
    "TX": "Texas", "UT": "Tennessee",
}.items():
    _SEC_NAME_VARIANTS[alias] = name
    _SEC_NAME_VARIANTS[alias.lower()] = name


def normalize_team_name(raw_name):
    """Return canonical SEC team name if recognized, otherwise return as-is."""
    return _SEC_NAME_VARIANTS.get(raw_name, _SEC_NAME_VARIANTS.get(raw_name.strip(), raw_name))


# ---------------------------------------------------------------------------
# Schema
# ---------------------------------------------------------------------------

SCHEMA = """
PRAGMA journal_mode=WAL;
PRAGMA foreign_keys=ON;

CREATE TABLE IF NOT EXISTS games (
    game_id         TEXT PRIMARY KEY,
    season          TEXT,
    sport           TEXT,
    provider_team   TEXT,
    source_file     TEXT,
    source_path     TEXT,
    date            TEXT,
    location        TEXT,
    stadium         TEXT,
    vis_id          TEXT,
    vis_name        TEXT,
    vis_record      TEXT,
    vis_rank        INTEGER,
    home_id         TEXT,
    home_name       TEXT,
    home_record     TEXT,
    home_rank       INTEGER,
    duration        TEXT,
    attend          INTEGER,
    neutral         INTEGER,
    night_game      INTEGER,
    weather         TEXT,
    start_time      TEXT,
    sched_inn       INTEGER,
    complete        INTEGER DEFAULT 1,
    game_type       TEXT DEFAULT "",
    ingested_at     TEXT,
    file_modified_at TEXT
);

CREATE TABLE IF NOT EXISTS linescore (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id         TEXT,
    team_id         TEXT,
    team_name       TEXT,
    vh              TEXT,
    runs            INTEGER,
    hits            INTEGER,
    errors          INTEGER,
    lob             INTEGER,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS linescore_innings (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id         TEXT,
    team_id         TEXT,
    vh              TEXT,
    inning          INTEGER,
    score           INTEGER,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS team_hitting (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, team_id TEXT, team_name TEXT, vh TEXT,
    ab INTEGER DEFAULT 0, r INTEGER DEFAULT 0, h INTEGER DEFAULT 0,
    rbi INTEGER DEFAULT 0, double_ INTEGER DEFAULT 0, triple INTEGER DEFAULT 0,
    hr INTEGER DEFAULT 0, bb INTEGER DEFAULT 0, so INTEGER DEFAULT 0,
    kl INTEGER DEFAULT 0, hbp INTEGER DEFAULT 0, sb INTEGER DEFAULT 0,
    cs INTEGER DEFAULT 0, sh INTEGER DEFAULT 0, sf INTEGER DEFAULT 0,
    gdp INTEGER DEFAULT 0, ground INTEGER DEFAULT 0, fly INTEGER DEFAULT 0,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS team_pitching (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, team_id TEXT, team_name TEXT, vh TEXT,
    ip_thirds INTEGER DEFAULT 0, h INTEGER DEFAULT 0, r INTEGER DEFAULT 0,
    er INTEGER DEFAULT 0, bb INTEGER DEFAULT 0, so INTEGER DEFAULT 0,
    hr INTEGER DEFAULT 0, hbp INTEGER DEFAULT 0, wp INTEGER DEFAULT 0,
    kl INTEGER DEFAULT 0, bf INTEGER DEFAULT 0, double_ INTEGER DEFAULT 0,
    triple INTEGER DEFAULT 0, gdp INTEGER DEFAULT 0, fly INTEGER DEFAULT 0,
    ground INTEGER DEFAULT 0, win TEXT, loss TEXT, save_ TEXT,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS team_fielding (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, team_id TEXT, team_name TEXT, vh TEXT,
    po INTEGER DEFAULT 0, a INTEGER DEFAULT 0, e INTEGER DEFAULT 0,
    dp INTEGER DEFAULT 0, sba INTEGER DEFAULT 0, csb INTEGER DEFAULT 0,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS player_hitting (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, team_id TEXT, team_name TEXT, vh TEXT,
    player_name TEXT, short_name TEXT, uni TEXT,
    gp INTEGER DEFAULT 0, gs INTEGER DEFAULT 0, spot INTEGER DEFAULT 0,
    pos TEXT, bats TEXT, throws TEXT, class_ TEXT,
    ab INTEGER DEFAULT 0, r INTEGER DEFAULT 0, h INTEGER DEFAULT 0,
    rbi INTEGER DEFAULT 0, double_ INTEGER DEFAULT 0, triple INTEGER DEFAULT 0,
    hr INTEGER DEFAULT 0, bb INTEGER DEFAULT 0, so INTEGER DEFAULT 0,
    kl INTEGER DEFAULT 0, hbp INTEGER DEFAULT 0, sb INTEGER DEFAULT 0,
    cs INTEGER DEFAULT 0, sh INTEGER DEFAULT 0, sf INTEGER DEFAULT 0,
    gdp INTEGER DEFAULT 0, ground INTEGER DEFAULT 0, fly INTEGER DEFAULT 0,
    season_ab INTEGER DEFAULT 0, season_r INTEGER DEFAULT 0,
    season_h INTEGER DEFAULT 0, season_rbi INTEGER DEFAULT 0,
    season_so INTEGER DEFAULT 0, season_bb INTEGER DEFAULT 0,
    season_hbp INTEGER DEFAULT 0, season_double INTEGER DEFAULT 0,
    season_triple INTEGER DEFAULT 0, season_hr INTEGER DEFAULT 0,
    season_sb INTEGER DEFAULT 0, season_cs INTEGER DEFAULT 0,
    season_sh INTEGER DEFAULT 0, season_sf INTEGER DEFAULT 0,
    season_avg TEXT, season_obp TEXT, season_slg TEXT,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS player_pitching (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, team_id TEXT, team_name TEXT, vh TEXT,
    player_name TEXT, short_name TEXT, uni TEXT,
    gp INTEGER DEFAULT 0, gs INTEGER DEFAULT 0,
    pos TEXT, bats TEXT, throws TEXT, class_ TEXT,
    appear INTEGER DEFAULT 0, ip_thirds INTEGER DEFAULT 0,
    h INTEGER DEFAULT 0, r INTEGER DEFAULT 0, er INTEGER DEFAULT 0,
    bb INTEGER DEFAULT 0, so INTEGER DEFAULT 0, hr INTEGER DEFAULT 0,
    hbp INTEGER DEFAULT 0, wp INTEGER DEFAULT 0, kl INTEGER DEFAULT 0,
    bf INTEGER DEFAULT 0, double_ INTEGER DEFAULT 0, triple INTEGER DEFAULT 0,
    gdp INTEGER DEFAULT 0, fly INTEGER DEFAULT 0, ground INTEGER DEFAULT 0,
    pitches INTEGER DEFAULT 0, strikes INTEGER DEFAULT 0,
    inherit_r INTEGER DEFAULT 0, inherit_s INTEGER DEFAULT 0,
    win TEXT, loss TEXT, save_ TEXT,
    season_era TEXT, season_ip TEXT,
    season_w INTEGER DEFAULT 0, season_l INTEGER DEFAULT 0, season_sv INTEGER DEFAULT 0,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS player_fielding (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, team_id TEXT, team_name TEXT, vh TEXT,
    player_name TEXT, uni TEXT, pos TEXT,
    po INTEGER DEFAULT 0, a INTEGER DEFAULT 0, e INTEGER DEFAULT 0,
    dp INTEGER DEFAULT 0, sba INTEGER DEFAULT 0, csb INTEGER DEFAULT 0,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS game_results (
    id              INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id TEXT, date TEXT,
    vis_id TEXT, vis_name TEXT, vis_score INTEGER,
    home_id TEXT, home_name TEXT, home_score INTEGER,
    winner_id TEXT, winner_name TEXT, loser_id TEXT, loser_name TEXT,
    location TEXT, stadium TEXT, duration TEXT, attend INTEGER,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE TABLE IF NOT EXISTS play_by_play (
    id          INTEGER PRIMARY KEY AUTOINCREMENT,
    game_id     TEXT,
    inning      INTEGER,
    vh          TEXT,
    seq         INTEGER,
    narrative   TEXT,
    FOREIGN KEY(game_id) REFERENCES games(game_id)
);

CREATE INDEX IF NOT EXISTS idx_ph_team    ON player_hitting(team_id);
CREATE INDEX IF NOT EXISTS idx_ph_player  ON player_hitting(player_name);
CREATE INDEX IF NOT EXISTS idx_ph_game    ON player_hitting(game_id);
CREATE INDEX IF NOT EXISTS idx_pp_team    ON player_pitching(team_id);
CREATE INDEX IF NOT EXISTS idx_pp_player  ON player_pitching(player_name);
CREATE INDEX IF NOT EXISTS idx_gr_date    ON game_results(date);
CREATE INDEX IF NOT EXISTS idx_games_date ON games(date);
CREATE INDEX IF NOT EXISTS idx_games_season ON games(season);
CREATE INDEX IF NOT EXISTS idx_pbp_game   ON play_by_play(game_id, inning);
"""


def get_db(path=DB_PATH):
    conn = sqlite3.connect(path)
    conn.row_factory = sqlite3.Row
    conn.executescript(SCHEMA)
    # Migrate existing databases — add new columns if missing
    for col_def in [
        "game_type TEXT DEFAULT ''",
        "vis_rank  INTEGER",
        "home_rank INTEGER",
    ]:
        col_name = col_def.split()[0]
        try:
            conn.execute(f"ALTER TABLE games ADD COLUMN {col_def}")
            conn.commit()
        except Exception:
            pass  # column already exists
    return conn


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def now_utc():
    return datetime.now(timezone.utc).isoformat()

def ival(el, attr, default=0):
    if el is None: return default
    v = el.get(attr)
    if v is None: return default
    try: return int(v)
    except (ValueError, TypeError): return default

def sval(el, attr, default=""):
    if el is None: return default
    return el.get(attr, default) or default

def ip_to_thirds(ip_str):
    if not ip_str: return 0
    try:
        parts = str(ip_str).split(".")
        return int(parts[0]) * 3 + (int(parts[1]) if len(parts) > 1 else 0)
    except (ValueError, IndexError): return 0

def thirds_to_ip(thirds):
    return f"{thirds // 3}.{thirds % 3}"


def _has_roster_table(conn):
    """Check if the roster/alias tables exist in this database."""
    row = conn.execute(
        "SELECT name FROM sqlite_master WHERE type='table' AND name='roster_aliases'"
    ).fetchone()
    return row is not None


def _flip_last_first(name):
    """Convert 'Last, First' → 'First Last'. Returns original if no comma."""
    if "," not in name:
        return name.strip()
    parts = name.split(",", 1)
    return f"{parts[1].strip()} {parts[0].strip()}"


def resolve_player_name(raw_name, team_name, jersey, conn):
    """
    Resolve a raw XML player name to a canonical 'First Last' display name.

    Resolution order:
    1. Direct alias lookup in roster_aliases (handles last-name-only, various formats)
    2. Flip 'Last, First' → 'First Last' and retry alias lookup
    3. Jersey number fallback (for ambiguous last-name matches)
    4. Basic format normalization (non-SEC or no roster data)
    """
    raw = raw_name.strip()
    if not raw:
        return raw

    team_code = SEC_NAME_TO_CODE.get(team_name)

    # If we have roster data for this team, try alias resolution
    if team_code and _has_roster_table(conn):
        # Step 1: direct alias lookup
        row = conn.execute("""
            SELECT r.display_name FROM roster r
            JOIN roster_aliases a ON a.roster_id = r.id
            WHERE a.alias = ? AND r.team_code = ?
            LIMIT 1
        """, (raw, team_code)).fetchone()
        if row:
            return row[0]

        # Step 2: flip Last, First → First Last and retry
        if "," in raw:
            flipped = _flip_last_first(raw)
            row = conn.execute("""
                SELECT r.display_name FROM roster r
                JOIN roster_aliases a ON a.roster_id = r.id
                WHERE a.alias = ? AND r.team_code = ?
                LIMIT 1
            """, (flipped, team_code)).fetchone()
            if row:
                return row[0]

        # Step 3: jersey number fallback
        if jersey:
            row = conn.execute("""
                SELECT display_name FROM roster
                WHERE team_code = ? AND jersey = ?
                LIMIT 1
            """, (team_code, str(jersey).strip())).fetchone()
            if row:
                return row[0]

    # Step 4: no roster match — basic format normalization
    if "," in raw:
        return _flip_last_first(raw)
    return raw


def extract_path_context(xml_path):
    """
    Extract season, sport, and provider_team from file path.
    Expects: .../data/raw/SPORT/SEASON/TEAM_ID/filename.XML
    Falls back gracefully if path doesn't match.
    Returns (season, sport_code, team_id, clean_filename)
    """
    parts = os.path.normpath(xml_path).split(os.sep)
    filename = parts[-1]
    stem = os.path.splitext(filename)[0].upper()

    team_id    = None
    season     = None
    sport_code = None

    for i in range(len(parts) - 2, -1, -1):
        p = parts[i].upper()
        pl = parts[i].lower()
        if p in SEC_TEAMS and team_id is None:
            team_id = p
        elif p.isdigit() and len(p) == 4 and season is None:
            season = p
        elif pl in ("baseball", "softball") and sport_code is None:
            sport_code = "BSB" if pl == "baseball" else "SB"

    # Fallbacks
    if not team_id:    team_id = "UNK"
    if not season:     season = str(datetime.now().year)
    if not sport_code: sport_code = "BSB"  # default to baseball

    return season, sport_code, team_id, stem


def make_game_id(venue_el, season, sport_code, team_id, file_stem):
    """
    Build a stable, unique game ID.
    Format: SEASON_SPORTCODE_TEAMID_FILESTEM
    Examples:
      2026_BSB_UA_GAME01   (baseball)
      2026_SB_UA_GAME01    (softball)
    Guarantees uniqueness across sports AND teams with identical filenames.
    The XML's internal gameid is ignored (too unreliable) but stored for reference.
    """
    stem = file_stem.upper().replace(" ", "_")
    return f"{season}_{sport_code}_{team_id}_{stem}"


# ---------------------------------------------------------------------------
# Delete all rows for a game (for replace mode)
# ---------------------------------------------------------------------------

GAME_TABLES = [
    "play_by_play", "game_results", "player_fielding",
    "player_pitching", "player_hitting", "team_fielding",
    "team_pitching", "team_hitting", "linescore_innings",
    "linescore", "games"
]

def delete_game(conn, game_id):
    for table in GAME_TABLES:
        conn.execute(f"DELETE FROM {table} WHERE game_id=?", (game_id,))
    conn.commit()
    print(f"  REPLACED: Deleted existing data for {game_id}")


# ---------------------------------------------------------------------------
# Core parser
# ---------------------------------------------------------------------------

def parse_game(xml_path, conn, replace=False, override_game_id=None, game_type=None):
    try:
        tree = ET.parse(xml_path)
    except ET.ParseError as e:
        print(f"  ERROR parsing {xml_path}: {e}")
        return "fail"

    root   = tree.getroot()
    venue  = root.find("venue")
    if venue is None:
        print(f"  ERROR: No <venue> in {xml_path}")
        return "fail"

    season, sport_code, team_id, file_stem = extract_path_context(xml_path)

    # Use override Game ID if provided (from --game-id flag),
    # otherwise derive it from the file path as before.
    if override_game_id:
        game_id = override_game_id.strip()
        print(f"  Using override Game ID: {game_id}")
    else:
        game_id = make_game_id(venue, season, sport_code, team_id, file_stem)

    # Get file modification time for change detection
    try:
        file_mtime = datetime.fromtimestamp(
            os.path.getmtime(xml_path), tz=timezone.utc
        ).isoformat()
    except OSError:
        file_mtime = now_utc()

    # Handle existing game
    existing = conn.execute(
        "SELECT game_id, ingested_at FROM games WHERE game_id=?", (game_id,)
    ).fetchone()

    if existing:
        if replace:
            delete_game(conn, game_id)
        else:
            print(f"  SKIP (already ingested): {game_id}")
            return "skip"

    print(f"  Importing: {game_id}  [{team_id} / {os.path.basename(xml_path)}]")

    status_el = root.find("status")
    complete  = 1 if (status_el is not None and
                      sval(status_el, "complete", "N").upper() == "Y") else 0

    game_row = {
        "game_id":        game_id,
        "season":         season,
        "sport_code":     sport_code,
        "provider_team":  team_id,
        "source_file":    os.path.basename(xml_path),
        "source_path":    os.path.abspath(xml_path),
        "date":           sval(venue, "date"),
        "location":       sval(venue, "location"),
        "stadium":        sval(venue, "stadium"),
        "vis_id":         SEC_NAME_TO_CODE.get(normalize_team_name(sval(venue, "visname")), sval(venue, "visid")),
        "vis_name":       normalize_team_name(sval(venue, "visname")),
        "vis_record":     "",
        "vis_rank":       None,
        "home_id":        SEC_NAME_TO_CODE.get(normalize_team_name(sval(venue, "homename")), sval(venue, "homeid")),
        "home_name":      normalize_team_name(sval(venue, "homename")),
        "home_record":    "",
        "home_rank":      None,
        "duration":       sval(venue, "duration"),
        "attend":         ival(venue, "attend"),
        "neutral":        1 if sval(venue, "neutralgame").upper() == "Y" else 0,
        "night_game":     1 if sval(venue, "nitegame", "N").upper() == "Y" else 0,
        "weather":        sval(venue, "weather"),
        "start_time":     sval(venue, "start"),
        "sched_inn":      ival(venue, "schedinn", 9),
        "complete":       complete,
        "game_type":      game_type or "",
        "ingested_at":    now_utc(),
        "file_modified_at": file_mtime,
    }

    # Insert game row first (FK anchor)
    conn.execute("""
        INSERT INTO games
            (game_id, season, sport, provider_team, source_file, source_path,
             date, location, stadium,
             vis_id, vis_name, vis_record, vis_rank,
             home_id, home_name, home_record, home_rank,
             duration, attend, neutral, night_game, weather,
             start_time, sched_inn, complete, game_type, ingested_at, file_modified_at)
        VALUES (
            :game_id, :season, :sport_code, :provider_team, :source_file, :source_path,
            :date, :location, :stadium,
            :vis_id, :vis_name, :vis_record, :vis_rank,
            :home_id, :home_name, :home_record, :home_rank,
            :duration, :attend, :neutral, :night_game, :weather,
            :start_time, :sched_inn, :complete, :game_type, :ingested_at, :file_modified_at
        )
    """, game_row)

    vis_score = 0
    home_score = 0

    for team_el in root.findall("team"):
        vh        = sval(team_el, "vh")
        team_id_t = sval(team_el, "id")
        team_name = normalize_team_name(sval(team_el, "name"))
        record    = sval(team_el, "record")

        # Normalize SEC team_id — XML files are inconsistent (e.g. AU vs AUBURN, UM vs MIZ)
        canonical_code = SEC_NAME_TO_CODE.get(team_name)
        if canonical_code:
            team_id_t = canonical_code

        rank_val = team_el.get("rank", "").strip()
        rank_int = int(rank_val) if rank_val.isdigit() else None

        if vh == "V":
            conn.execute("UPDATE games SET vis_record=?, vis_rank=? WHERE game_id=?",
                         (record, rank_int, game_id))
        else:
            conn.execute("UPDATE games SET home_record=?, home_rank=? WHERE game_id=?",
                         (record, rank_int, game_id))

        ls = team_el.find("linescore")
        if ls is not None:
            runs = ival(ls, "runs"); hits = ival(ls, "hits")
            errs = ival(ls, "errs"); lob  = ival(ls, "lob")
            if vh == "V": vis_score = runs
            else:         home_score = runs
            conn.execute("""
                INSERT INTO linescore (game_id, team_id, team_name, vh, runs, hits, errors, lob)
                VALUES (?,?,?,?,?,?,?,?)
            """, (game_id, team_id_t, team_name, vh, runs, hits, errs, lob))
            for inn_el in ls.findall("lineinn"):
                conn.execute("""
                    INSERT INTO linescore_innings (game_id, team_id, vh, inning, score)
                    VALUES (?,?,?,?,?)
                """, (game_id, team_id_t, vh, ival(inn_el,"inn"), ival(inn_el,"score")))

        totals = team_el.find("totals")
        if totals is not None:
            th = totals.find("hitting")
            if th is not None:
                conn.execute("""
                    INSERT INTO team_hitting
                    (game_id,team_id,team_name,vh,ab,r,h,rbi,double_,triple,hr,
                     bb,so,kl,hbp,sb,cs,sh,sf,gdp,ground,fly)
                    VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
                """, (game_id,team_id_t,team_name,vh,
                      ival(th,"ab"),ival(th,"r"),ival(th,"h"),ival(th,"rbi"),
                      ival(th,"double"),ival(th,"triple"),ival(th,"hr"),
                      ival(th,"bb"),ival(th,"so"),ival(th,"kl"),ival(th,"hbp"),
                      ival(th,"sb"),ival(th,"cs"),ival(th,"sh"),ival(th,"sf"),
                      ival(th,"gdp"),ival(th,"ground"),ival(th,"fly")))

            tp = totals.find("pitching")
            if tp is not None:
                conn.execute("""
                    INSERT INTO team_pitching
                    (game_id,team_id,team_name,vh,ip_thirds,h,r,er,bb,so,
                     hr,hbp,wp,kl,bf,double_,triple,gdp,fly,ground,win,loss,save_)
                    VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
                """, (game_id,team_id_t,team_name,vh,
                      ip_to_thirds(sval(tp,"ip")),
                      ival(tp,"h"),ival(tp,"r"),ival(tp,"er"),
                      ival(tp,"bb"),ival(tp,"so"),ival(tp,"hr"),
                      ival(tp,"hbp"),ival(tp,"wp"),ival(tp,"kl"),ival(tp,"bf"),
                      ival(tp,"double"),ival(tp,"triple"),
                      ival(tp,"gdp"),ival(tp,"fly"),ival(tp,"ground"),
                      sval(tp,"win"),sval(tp,"loss"),sval(tp,"save")))

            tf = totals.find("fielding")
            if tf is not None:
                conn.execute("""
                    INSERT INTO team_fielding
                    (game_id,team_id,team_name,vh,po,a,e,dp,sba,csb)
                    VALUES (?,?,?,?,?,?,?,?,?,?)
                """, (game_id,team_id_t,team_name,vh,
                      ival(tf,"po"),ival(tf,"a"),ival(tf,"e"),
                      ival(tf,"indp"),ival(tf,"sba"),ival(tf,"csb")))

        for p in team_el.findall("player"):
            pname     = sval(p,"name"); shortname = sval(p,"shortname")
            uni       = sval(p,"uni");  gp        = ival(p,"gp")
            pname     = resolve_player_name(pname, team_name, uni, conn)
            gs        = ival(p,"gs");   spot      = ival(p,"spot")
            pos       = sval(p,"pos");  bats      = sval(p,"bats")
            throws    = sval(p,"throws"); cls     = sval(p,"class")

            ph = p.find("hitting")
            if ph is not None and gp > 0:
                hs = p.find("hitseason")
                conn.execute("""
                    INSERT INTO player_hitting
                    (game_id,team_id,team_name,vh,player_name,short_name,uni,
                     gp,gs,spot,pos,bats,throws,class_,
                     ab,r,h,rbi,double_,triple,hr,bb,so,kl,hbp,
                     sb,cs,sh,sf,gdp,ground,fly,
                     season_ab,season_r,season_h,season_rbi,season_so,
                     season_bb,season_hbp,season_double,season_triple,season_hr,
                     season_sb,season_cs,season_sh,season_sf,season_avg)
                    VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
                """, (game_id,team_id_t,team_name,vh,pname,shortname,uni,
                      gp,gs,spot,pos,bats,throws,cls,
                      ival(ph,"ab"),ival(ph,"r"),ival(ph,"h"),ival(ph,"rbi"),
                      ival(ph,"double"),ival(ph,"triple"),ival(ph,"hr"),
                      ival(ph,"bb"),ival(ph,"so"),ival(ph,"kl"),ival(ph,"hbp"),
                      ival(ph,"sb"),ival(ph,"cs"),ival(ph,"sh"),ival(ph,"sf"),
                      ival(ph,"gdp"),ival(ph,"ground"),ival(ph,"fly"),
                      ival(hs,"ab") if hs is not None else 0,
                      ival(hs,"r")  if hs is not None else 0,
                      ival(hs,"h")  if hs is not None else 0,
                      ival(hs,"rbi") if hs is not None else 0,
                      ival(hs,"so") if hs is not None else 0,
                      ival(hs,"bb") if hs is not None else 0,
                      ival(hs,"hbp") if hs is not None else 0,
                      ival(hs,"double") if hs is not None else 0,
                      ival(hs,"triple") if hs is not None else 0,
                      ival(hs,"hr") if hs is not None else 0,
                      ival(hs,"sb") if hs is not None else 0,
                      ival(hs,"cs") if hs is not None else 0,
                      ival(hs,"sh") if hs is not None else 0,
                      ival(hs,"sf") if hs is not None else 0,
                      sval(hs,"avg") if hs is not None else ""))

            pp = p.find("pitching")
            if pp is not None and gp > 0:
                ps_el  = p.find("pchseason")
                psumm  = p.find("psitsummary")
                pitches = ival(psumm,"pitches") if psumm is not None else 0
                strikes = ival(psumm,"strikes") if psumm is not None else 0
                conn.execute("""
                    INSERT INTO player_pitching
                    (game_id,team_id,team_name,vh,player_name,short_name,uni,
                     gp,gs,pos,bats,throws,class_,
                     appear,ip_thirds,h,r,er,bb,so,hr,hbp,wp,kl,bf,
                     double_,triple,gdp,fly,ground,pitches,strikes,
                     inherit_r,inherit_s,win,loss,save_,
                     season_era,season_ip,season_w,season_l,season_sv)
                    VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
                """, (game_id,team_id_t,team_name,vh,pname,shortname,uni,
                      gp,gs,pos,bats,throws,cls,
                      ival(pp,"appear"),ip_to_thirds(sval(pp,"ip")),
                      ival(pp,"h"),ival(pp,"r"),ival(pp,"er"),
                      ival(pp,"bb"),ival(pp,"so"),ival(pp,"hr"),
                      ival(pp,"hbp"),ival(pp,"wp"),ival(pp,"kl"),ival(pp,"bf"),
                      ival(pp,"double"),ival(pp,"triple"),
                      ival(pp,"gdp"),ival(pp,"fly"),ival(pp,"ground"),
                      pitches,strikes,
                      ival(pp,"inheritr"),ival(pp,"inherits"),
                      sval(pp,"win"),sval(pp,"loss"),sval(pp,"save"),
                      sval(ps_el,"era") if ps_el is not None else "",
                      sval(ps_el,"ip")  if ps_el is not None else "",
                      ival(ps_el,"win") if ps_el is not None else 0,
                      ival(ps_el,"loss") if ps_el is not None else 0,
                      ival(ps_el,"save") if ps_el is not None else 0))

            pf = p.find("fielding")
            if pf is not None and gp > 0:
                conn.execute("""
                    INSERT INTO player_fielding
                    (game_id,team_id,team_name,vh,player_name,uni,pos,po,a,e,dp,sba,csb)
                    VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
                """, (game_id,team_id_t,team_name,vh,pname,uni,pos,
                      ival(pf,"po"),ival(pf,"a"),ival(pf,"e"),
                      ival(pf,"indp"),ival(pf,"sba"),ival(pf,"csb")))

    # Play-by-play
    plays_el = root.find("plays")
    if plays_el is not None:
        for inn_el in plays_el.findall("inning"):
            inn_num = ival(inn_el, "number")
            for batting_el in inn_el.findall("batting"):
                batting_vh = sval(batting_el, "vh")
                for play_el in batting_el.findall("play"):
                    narr_el = play_el.find("narrative")
                    if narr_el is not None:
                        narrative = sval(narr_el, "text")
                        if narrative:
                            conn.execute("""
                                INSERT INTO play_by_play (game_id,inning,vh,seq,narrative)
                                VALUES (?,?,?,?,?)
                            """, (game_id, inn_num, batting_vh,
                                  ival(play_el,"seq"), narrative))

    # Game result
    vis_name  = game_row["vis_name"];  home_name = game_row["home_name"]
    vis_id_g  = game_row["vis_id"];    home_id_g = game_row["home_id"]
    winner_id   = vis_id_g   if vis_score > home_score else home_id_g
    winner_name = vis_name   if vis_score > home_score else home_name
    loser_id    = home_id_g  if vis_score > home_score else vis_id_g
    loser_name  = home_name  if vis_score > home_score else vis_name
    conn.execute("""
        INSERT INTO game_results
        (game_id,date,vis_id,vis_name,vis_score,home_id,home_name,home_score,
         winner_id,winner_name,loser_id,loser_name,location,stadium,duration,attend)
        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
    """, (game_id,game_row["date"],vis_id_g,vis_name,vis_score,
          home_id_g,home_name,home_score,
          winner_id,winner_name,loser_id,loser_name,
          game_row["location"],game_row["stadium"],
          game_row["duration"],game_row["attend"]))

    conn.commit()
    return "ok"


# ---------------------------------------------------------------------------
# Folder scanner — finds all XML files under a root directory
# ---------------------------------------------------------------------------

def scan_folder(root_dir):
    """Recursively find all .XML / .xml files under root_dir."""
    found = []
    for dirpath, _, filenames in os.walk(root_dir):
        for fname in filenames:
            if fname.upper().endswith(".XML"):
                found.append(os.path.join(dirpath, fname))
    return sorted(found)


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description="SEC Stats XML Parser v3",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python3 parse_games.py data/raw/2026/LSU/whatever-lsu-named-it.xml
  python3 parse_games.py --game-id 2026_BSB_LSU_0006 data/raw/2026/LSU/lsu-milw-g3.xml
  python3 parse_games.py --scan data/raw/2026
  python3 parse_games.py --replace data/raw/2026/LSU/game.xml
  python3 parse_games.py --replace --game-id 2026_BSB_LSU_0006 data/raw/2026/LSU/corrected.xml
        """
    )
    parser.add_argument("files", nargs="*", help="XML file(s) to ingest")
    parser.add_argument("--scan", metavar="DIR",
                        help="Recursively scan a directory for XML files")
    parser.add_argument("--replace", action="store_true",
                        help="Re-ingest files even if already in database (for corrections)")
    parser.add_argument("--db", default=DB_PATH, help="Path to SQLite database")
    parser.add_argument("--game-id", default=None, metavar="GAME_ID",
                        help="Override the auto-generated Game ID (use with single-file ingestion). "
                             "Matches the ID pre-assigned in the Game Registry. "
                             "Example: 2026_BSB_LSU_0006")
    parser.add_argument("--game-type", default=None, metavar="GAME_TYPE",
                        help="Game type from the Registry: Conference, Non-Conference, or Neutral Site. "
                             "Passed automatically by the watcher from the lookup file.")
    args = parser.parse_args()

    # --game-id only makes sense for a single file
    if args.game_id and args.scan:
        parser.error("--game-id cannot be used with --scan (scan processes multiple files)")
    if args.game_id and len(args.files) > 1:
        parser.error("--game-id can only be used when ingesting a single file")

    # Collect paths
    paths = []
    if args.scan:
        paths = scan_folder(args.scan)
        print(f"Scan found {len(paths)} XML file(s) under {args.scan}")
    for pattern in args.files:
        expanded = glob.glob(pattern, recursive=True)
        paths.extend(expanded if expanded else [pattern])

    if not paths:
        parser.print_help()
        sys.exit(1)

    conn = get_db(args.db)
    ok = skip = fail = 0

    for path in paths:
        if not os.path.exists(path):
            print(f"  NOT FOUND: {path}")
            fail += 1
            continue
        result = parse_game(path, conn, replace=args.replace,
                            override_game_id=args.game_id,
                            game_type=args.game_type)
        if result == "ok":     ok   += 1
        elif result == "skip": skip += 1
        else:                  fail += 1

    conn.close()
    print(f"\nDone.  Imported: {ok}  Skipped: {skip}  Failed: {fail}")
    print(f"Database: {os.path.abspath(args.db)}")


if __name__ == "__main__":
    main()
