/home/agent-jay/claudeCode/jarvis/finance/scripts/berg_gate_g2.py
berg_gate_g2.py — Gate G2 backtest harness

"""Berg combo Gate G2 — backtest combos against Berg's named precedent dates.

Per plans/berg-atomic-indicators.md:
  G2: combo logic must re-fire on >=80% of Berg's named historical dates
      (within +/- 2 trading days). If not, atoms are misdefined and we stop.

This script:
  1. Loads OHLCV for SPY/QQQ/VIX from finance/research.db (best proxies for
     SPX/NDX/VIX). Builds a panel keyed by berg_combos.INSTR_* keys.
  2. Runs every live combo from berg_combos.BERG_COMBO_REGISTRY.
  3. For each precedent date, checks if the combo fired within +/- 2 days.
  4. Prints a per-combo report and an overall G2 verdict.

Exit codes:
  0  : script ran cleanly (verdict PASS, FAIL, or INCONCLUSIVE).
  2  : real failure — DB unreadable or no instrument data loaded.
  1  : only with --exit-on-verdict, when the verdict is FAIL/INCONCLUSIVE.

Default exit semantics are cron-safe: a clean run always returns 0 so the
verdict can be parsed from stdout without colliding with the standard
"non-zero = script crashed" convention. Use --exit-on-verdict when an
upstream caller (rare) wants the verdict promoted to process exit.

Usage:
    finance/venv/bin/python -m finance.scripts.berg_gate_g2
    finance/venv/bin/python -m finance.scripts.berg_gate_g2 --exit-on-verdict
"""
from __future__ import annotations

import argparse
import sqlite3
import sys
from pathlib import Path

import pandas as pd

from finance.indicators import berg_combos as bc

DB_PATH = Path("/home/agent-jay/claudeCode/jarvis/finance/research.db")
GATE_THRESHOLD = 0.80

# Instrument mapping: real Berg instrument -> ticker we have in research.db.
# After Phase 0 backfill (2026-05-25), we use the actual indices (SPX/NDX/RUT/SOX/VIX)
# rather than ETF proxies. Indices have yfinance-reported volume back to 1957/1985/1987,
# unlocking pre-2003 Berg precedents. Atoms are scale-invariant (rolling rank / relative
# change) so swapping ETF -> index does not bias the signal logic.
PROXY_MAP = {
    bc.INSTR_SPX: "SPX",   # ^SPX from 1957
    bc.INSTR_NDX: "NDX",   # ^NDX from 1985
    bc.INSTR_VIX: "VIX",   # ^VIX from 1990
    bc.INSTR_RUT: "RUT",   # ^RUT from 1987
    bc.INSTR_SOX: "SOX",   # ^SOX from 1994
}


def load_panel() -> dict[str, pd.DataFrame]:
    """Load OHLCV from prices_daily and key it by Berg instrument id."""
    con = sqlite3.connect(DB_PATH)
    panel: dict[str, pd.DataFrame] = {}
    for instr, ticker in PROXY_MAP.items():
        df = pd.read_sql_query(
            "SELECT ts, open, high, low, close, volume FROM prices_daily "
            "WHERE ticker = ? ORDER BY ts",
            con,
            params=(ticker,),
        )
        if df.empty:
            continue
        df["date"] = pd.to_datetime(df["ts"], unit="s")
        df = df.set_index("date").drop(columns=["ts"])
        # Only keep symbols with enough history to run 252d-warmup atoms.
        if len(df) < 260:
            print(f"  [skip] {instr}<-{ticker}: only {len(df)} rows (need >=260)")
            continue
        panel[instr] = df
        print(f"  [load] {instr}<-{ticker}: {len(df):>5} rows "
              f"{df.index.min().date()} -> {df.index.max().date()}")
    con.close()
    return panel


def main() -> int:
    ap = argparse.ArgumentParser(description="Berg Combo Gate G2 backtest")
    ap.add_argument(
        "--exit-on-verdict",
        action="store_true",
        help="Promote verdict to process exit code (FAIL/INCONCLUSIVE → 1). "
             "Default off: clean runs always exit 0 so cron can distinguish "
             "verdict outcomes (in stdout) from real script crashes.",
    )
    args = ap.parse_args()

    print("=" * 72)
    print("Berg Combo Gate G2 backtest")
    print(f"  threshold: >={GATE_THRESHOLD*100:.0f}% match within +/- 2 trading days")
    print(f"  db: {DB_PATH}")
    print("=" * 72)
    print("\n[1] Loading panel from research.db ...")
    panel = load_panel()
    if not panel:
        print("\nFAIL: no instruments loaded from research.db")
        return 2  # real failure (DB / data layer); exit non-zero regardless of flag

    print(f"\n[2] Running {len(bc.BERG_COMBO_REGISTRY)} combos ...\n")
    summary = bc.precedent_summary(panel, tolerance_days=2)

    # Format and print
    print(f"{'combo':<6}  {'name':<48}  {'matches':>10}  {'status'}")
    print("-" * 100)
    for _, row in summary.iterrows():
        spec = bc.BERG_COMBO_REGISTRY[row["combo_id"]]
        name = spec.name[:46]
        if row["status"].startswith("BLOCKED"):
            match_str = "  -    "
            stat = "BLOCKED"
        elif row["status"].startswith("ERROR"):
            match_str = "  err  "
            stat = row["status"][:50]
        else:
            match_str = f"{int(row['matches'])}/{int(row['in_range'])}"
            stat = "LIVE"
        print(f"{row['combo_id']:<6}  {name:<48}  {match_str:>10}  {stat}")

    # Detailed per-combo precedent table (only for LIVE combos)
    print("\n[3] Per-precedent detail (LIVE combos only) ...\n")
    for combo_id, spec in bc.BERG_COMBO_REGISTRY.items():
        if spec.blocked:
            continue
        try:
            results = bc.backtest_precedents(combo_id, panel, tolerance_days=2)
        except (KeyError, ValueError) as e:
            print(f"  {combo_id}: skipped ({e})")
            continue
        print(f"  {combo_id} — {spec.name}")
        for r in results:
            mark = {"MATCH": "OK ", "MISS": "x  ", "OUT_OF_RANGE": "-- ", "NO_DATA": "?  "}[r["status"]]
            extra = f"  ({', '.join(r['match_dates'])})" if r["match_dates"] else ""
            print(f"     {mark} {r['date']}  [{r['status']}]{extra}")
        print()

    # Verdict
    live = summary[summary["status"] == "LIVE"]
    total_matches = int(live["matches"].sum())
    total_in_range = int(live["in_range"].sum())
    print("=" * 72)
    if total_in_range == 0:
        print("INCONCLUSIVE: no live combo has any precedent inside data range.")
        print("  Likely cause: research.db only has 2010+ history; Berg cites "
              "1957-1998 precedents. Phase 0 backfill needed.")
        return 1 if args.exit_on_verdict else 0
    overall = total_matches / total_in_range
    print(f"OVERALL G2: {total_matches}/{total_in_range} = {overall*100:.1f}%  "
          f"(threshold {GATE_THRESHOLD*100:.0f}%)")
    if overall >= GATE_THRESHOLD:
        print("VERDICT: PASS - atoms appear correctly defined on testable precedents.")
        return 0
    print("VERDICT: FAIL - atoms likely misdefined; review combos with MISS rows.")
    return 1 if args.exit_on_verdict else 0


if __name__ == "__main__":
    raise SystemExit(main())