/home/agent-jay/claudeCode/jarvis/finance/scripts/berg_gate_g2.py
berg_gate_g2.py — Gate G2 backtest harness
"""Berg combo Gate G2 — backtest combos against Berg's named precedent dates.
Per plans/berg-atomic-indicators.md:
G2: combo logic must re-fire on >=80% of Berg's named historical dates
(within +/- 2 trading days). If not, atoms are misdefined and we stop.
This script:
1. Loads OHLCV for SPY/QQQ/VIX from finance/research.db (best proxies for
SPX/NDX/VIX). Builds a panel keyed by berg_combos.INSTR_* keys.
2. Runs every live combo from berg_combos.BERG_COMBO_REGISTRY.
3. For each precedent date, checks if the combo fired within +/- 2 days.
4. Prints a per-combo report and an overall G2 verdict.
Exit codes:
0 : script ran cleanly (verdict PASS, FAIL, or INCONCLUSIVE).
2 : real failure — DB unreadable or no instrument data loaded.
1 : only with --exit-on-verdict, when the verdict is FAIL/INCONCLUSIVE.
Default exit semantics are cron-safe: a clean run always returns 0 so the
verdict can be parsed from stdout without colliding with the standard
"non-zero = script crashed" convention. Use --exit-on-verdict when an
upstream caller (rare) wants the verdict promoted to process exit.
Usage:
finance/venv/bin/python -m finance.scripts.berg_gate_g2
finance/venv/bin/python -m finance.scripts.berg_gate_g2 --exit-on-verdict
"""
from __future__ import annotations
import argparse
import sqlite3
import sys
from pathlib import Path
import pandas as pd
from finance.indicators import berg_combos as bc
DB_PATH = Path("/home/agent-jay/claudeCode/jarvis/finance/research.db")
GATE_THRESHOLD = 0.80
# Instrument mapping: real Berg instrument -> ticker we have in research.db.
# After Phase 0 backfill (2026-05-25), we use the actual indices (SPX/NDX/RUT/SOX/VIX)
# rather than ETF proxies. Indices have yfinance-reported volume back to 1957/1985/1987,
# unlocking pre-2003 Berg precedents. Atoms are scale-invariant (rolling rank / relative
# change) so swapping ETF -> index does not bias the signal logic.
PROXY_MAP = {
bc.INSTR_SPX: "SPX", # ^SPX from 1957
bc.INSTR_NDX: "NDX", # ^NDX from 1985
bc.INSTR_VIX: "VIX", # ^VIX from 1990
bc.INSTR_RUT: "RUT", # ^RUT from 1987
bc.INSTR_SOX: "SOX", # ^SOX from 1994
}
def load_panel() -> dict[str, pd.DataFrame]:
"""Load OHLCV from prices_daily and key it by Berg instrument id."""
con = sqlite3.connect(DB_PATH)
panel: dict[str, pd.DataFrame] = {}
for instr, ticker in PROXY_MAP.items():
df = pd.read_sql_query(
"SELECT ts, open, high, low, close, volume FROM prices_daily "
"WHERE ticker = ? ORDER BY ts",
con,
params=(ticker,),
)
if df.empty:
continue
df["date"] = pd.to_datetime(df["ts"], unit="s")
df = df.set_index("date").drop(columns=["ts"])
# Only keep symbols with enough history to run 252d-warmup atoms.
if len(df) < 260:
print(f" [skip] {instr}<-{ticker}: only {len(df)} rows (need >=260)")
continue
panel[instr] = df
print(f" [load] {instr}<-{ticker}: {len(df):>5} rows "
f"{df.index.min().date()} -> {df.index.max().date()}")
con.close()
return panel
def main() -> int:
ap = argparse.ArgumentParser(description="Berg Combo Gate G2 backtest")
ap.add_argument(
"--exit-on-verdict",
action="store_true",
help="Promote verdict to process exit code (FAIL/INCONCLUSIVE → 1). "
"Default off: clean runs always exit 0 so cron can distinguish "
"verdict outcomes (in stdout) from real script crashes.",
)
args = ap.parse_args()
print("=" * 72)
print("Berg Combo Gate G2 backtest")
print(f" threshold: >={GATE_THRESHOLD*100:.0f}% match within +/- 2 trading days")
print(f" db: {DB_PATH}")
print("=" * 72)
print("\n[1] Loading panel from research.db ...")
panel = load_panel()
if not panel:
print("\nFAIL: no instruments loaded from research.db")
return 2 # real failure (DB / data layer); exit non-zero regardless of flag
print(f"\n[2] Running {len(bc.BERG_COMBO_REGISTRY)} combos ...\n")
summary = bc.precedent_summary(panel, tolerance_days=2)
# Format and print
print(f"{'combo':<6} {'name':<48} {'matches':>10} {'status'}")
print("-" * 100)
for _, row in summary.iterrows():
spec = bc.BERG_COMBO_REGISTRY[row["combo_id"]]
name = spec.name[:46]
if row["status"].startswith("BLOCKED"):
match_str = " - "
stat = "BLOCKED"
elif row["status"].startswith("ERROR"):
match_str = " err "
stat = row["status"][:50]
else:
match_str = f"{int(row['matches'])}/{int(row['in_range'])}"
stat = "LIVE"
print(f"{row['combo_id']:<6} {name:<48} {match_str:>10} {stat}")
# Detailed per-combo precedent table (only for LIVE combos)
print("\n[3] Per-precedent detail (LIVE combos only) ...\n")
for combo_id, spec in bc.BERG_COMBO_REGISTRY.items():
if spec.blocked:
continue
try:
results = bc.backtest_precedents(combo_id, panel, tolerance_days=2)
except (KeyError, ValueError) as e:
print(f" {combo_id}: skipped ({e})")
continue
print(f" {combo_id} — {spec.name}")
for r in results:
mark = {"MATCH": "OK ", "MISS": "x ", "OUT_OF_RANGE": "-- ", "NO_DATA": "? "}[r["status"]]
extra = f" ({', '.join(r['match_dates'])})" if r["match_dates"] else ""
print(f" {mark} {r['date']} [{r['status']}]{extra}")
print()
# Verdict
live = summary[summary["status"] == "LIVE"]
total_matches = int(live["matches"].sum())
total_in_range = int(live["in_range"].sum())
print("=" * 72)
if total_in_range == 0:
print("INCONCLUSIVE: no live combo has any precedent inside data range.")
print(" Likely cause: research.db only has 2010+ history; Berg cites "
"1957-1998 precedents. Phase 0 backfill needed.")
return 1 if args.exit_on_verdict else 0
overall = total_matches / total_in_range
print(f"OVERALL G2: {total_matches}/{total_in_range} = {overall*100:.1f}% "
f"(threshold {GATE_THRESHOLD*100:.0f}%)")
if overall >= GATE_THRESHOLD:
print("VERDICT: PASS - atoms appear correctly defined on testable precedents.")
return 0
print("VERDICT: FAIL - atoms likely misdefined; review combos with MISS rows.")
return 1 if args.exit_on_verdict else 0
if __name__ == "__main__":
raise SystemExit(main())