#!/usr/bin/env python3
"""verify_clv_gap.py — reproduce every number on the /clv-evidence page.

Usage:
  python3 verify_clv_gap.py [PATH_TO_trades.jsonl]

Defaults to ./trades.jsonl if no path given. The dataset is published
at https://zenhodl.net/api/trades.jsonl.

Outputs:
  - Headline CLV+ / CLV− split with Wilson 95% CIs + formal two-proportion Z-test
  - Per-sport breakdown (sports with ≥30 trades in the gap, i.e. CLV+ ∪ CLV−)
  - Robustness across CLV thresholds
  - Selection-bias analysis: WR comparison of measured vs unmeasured subsets
  - Per-sport coverage rate (what fraction of settled trades got closing price)
"""
from __future__ import annotations

import json
import math
import sys
from collections import Counter, defaultdict
from pathlib import Path

# CLV bucketing threshold (cents). A trade is CLV+ if close − entry > +THRESH,
# CLV− if close − entry < −THRESH, otherwise CLV= (neutral / dropped from gap).
THRESH_C = 0.5


def is_won(r) -> bool:
    """Unified `won` predicate: handles bool True/False, 0/1 int, and None."""
    v = r.get("won")
    return bool(v) if v is not None else False


def sport_of(r) -> str:
    sp = (r.get("sport") or r.get("league") or "").upper()
    tour = (r.get("tour") or "").upper()
    if sp == "TENNIS" and tour in ("ATP", "WTA"):
        return tour
    return sp or "UNKNOWN"


def wilson_ci(wins: int, n: int, z: float = 1.96) -> tuple[float, float]:
    """95% Wilson confidence interval for a binomial proportion (per-bucket)."""
    if n == 0:
        return (0.0, 0.0)
    p = wins / n
    denom = 1 + z * z / n
    centre = (p + z * z / (2 * n)) / denom
    half = (z / denom) * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
    return (max(0.0, centre - half), min(1.0, centre + half))


def two_prop_z(x1: int, n1: int, x2: int, n2: int) -> tuple[float, float]:
    """Two-proportion pooled-variance Z-test. Returns (z, two-sided p-value)."""
    if n1 == 0 or n2 == 0:
        return (float("nan"), float("nan"))
    p_pool = (x1 + x2) / (n1 + n2)
    se = math.sqrt(p_pool * (1 - p_pool) * (1 / n1 + 1 / n2))
    if se == 0:
        return (float("inf"), 0.0)
    z = (x1 / n1 - x2 / n2) / se
    return (z, math.erfc(abs(z) / math.sqrt(2)))


def main(argv: list[str]) -> int:
    path_arg = argv[1] if len(argv) > 1 else "trades.jsonl"
    path = Path(path_arg)
    if not path.exists():
        print(f"ERROR: trades file not found: {path}")
        print(f"Pass a path explicitly, or download from https://zenhodl.net/api/trades.jsonl")
        return 1

    all_rows: list[dict] = []
    with open(path) as f:
        for line in f:
            try:
                all_rows.append(json.loads(line))
            except json.JSONDecodeError:
                continue
    print(f"Loaded {len(all_rows):,} trade records from {path}")

    settled = [r for r in all_rows if r.get("won") is not None]
    measured = [r for r in settled if r.get("closing_price_c") is not None
                and r.get("entry_price_c") is not None]
    unmeasured = [r for r in settled if r.get("closing_price_c") is None]
    print(f"  settled (outcome known):       {len(settled):,}")
    print(f"  with closing price + entry:    {len(measured):,}  (the measured subset)")
    print(f"  without closing price:         {len(unmeasured):,}  (selection-bias check below)")
    print()

    # ─── Headline CLV bucketing ────────────────────────────────────────
    plus, minus, neutral = [], [], []
    for r in measured:
        try:
            entry = float(r["entry_price_c"])
            close = float(r["closing_price_c"])
        except (TypeError, ValueError):
            continue
        diff = close - entry
        if diff > THRESH_C:
            plus.append(r)
        elif diff < -THRESH_C:
            minus.append(r)
        else:
            neutral.append(r)

    print("=" * 72)
    print(f"HEADLINE — CLV bucketing (threshold ±{THRESH_C}c)")
    print("=" * 72)
    print(f"{'Bucket':<8} {'n':>5} {'wins':>5} {'WR':>7} {'95% Wilson CI':>20} {'mean CLV':>10}")
    for label, rows in [("CLV+", plus), ("CLV=", neutral), ("CLV-", minus)]:
        n = len(rows)
        wins = sum(1 for r in rows if is_won(r))
        wr = wins / n if n else 0.0
        lo, hi = wilson_ci(wins, n)
        mean_clv = (sum((r["closing_price_c"] - r["entry_price_c"]) for r in rows) / n) if n else 0.0
        ci_str = f"[{lo*100:.1f}, {hi*100:.1f}]"
        print(f"{label:<8} {n:>5} {wins:>5} {wr*100:>6.1f}% {ci_str:>20} {mean_clv:>+9.1f}c")
    print()

    # Two-proportion Z-test on the CLV+ vs CLV− gap
    x1 = sum(1 for r in plus if is_won(r))
    n1 = len(plus)
    x2 = sum(1 for r in minus if is_won(r))
    n2 = len(minus)
    z, p = two_prop_z(x1, n1, x2, n2)
    gap_pp = (x1 / n1 - x2 / n2) * 100 if n1 and n2 else 0.0
    print(f"GAP: {gap_pp:.1f} percentage points")
    print(f"  Two-proportion Z-test:  z = {z:.2f}   two-sided p ≈ {p:.2e}")
    print(f"  Gap measured on n={n1+n2} (drops {len(neutral)} neutral trades)")
    print()

    # ─── Per-sport breakdown ──────────────────────────────────────────
    print("=" * 72)
    print("PER-SPORT (sports with ≥30 trades in CLV+ ∪ CLV−)")
    print("=" * 72)
    by_plus = defaultdict(list)
    by_minus = defaultdict(list)
    for r in plus:
        by_plus[sport_of(r)].append(r)
    for r in minus:
        by_minus[sport_of(r)].append(r)
    sports = sorted(set(list(by_plus.keys()) + list(by_minus.keys())))
    print(f"{'Sport':<10} {'CLV+':>5} {'CLV+ WR':>9} {'CLV-':>5} {'CLV- WR':>9} {'Gap':>7} {'Ratio':>7}")
    for sp in sports:
        p_rows = by_plus.get(sp, [])
        m_rows = by_minus.get(sp, [])
        total = len(p_rows) + len(m_rows)
        if total < 30:
            continue
        p_wins = sum(1 for r in p_rows if is_won(r))
        m_wins = sum(1 for r in m_rows if is_won(r))
        p_wr = p_wins / len(p_rows) if p_rows else 0
        m_wr = m_wins / len(m_rows) if m_rows else 0
        gap = (p_wr - m_wr) * 100
        ratio = (p_wr / m_wr) if m_wr > 0 else float("inf")
        print(f"  {sp:<8} {len(p_rows):>5} {p_wr*100:>8.1f}% {len(m_rows):>5} {m_wr*100:>8.1f}% {gap:>+5.0f}pp {ratio:>6.1f}×")
    print()

    # ─── Robustness across thresholds ─────────────────────────────────
    print("=" * 72)
    print("ROBUSTNESS — gap across different CLV thresholds")
    print("=" * 72)
    print(f"{'Threshold':<12} {'CLV+':>5} {'CLV+ WR':>9} {'CLV-':>5} {'CLV- WR':>9} {'Gap':>7}")
    for t in [0.0, 0.5, 1.0, 2.0, 5.0]:
        p = [r for r in measured if (r["closing_price_c"] - r["entry_price_c"]) > t]
        m = [r for r in measured if (r["closing_price_c"] - r["entry_price_c"]) < -t]
        if not p or not m:
            continue
        p_wr = sum(1 for r in p if is_won(r)) / len(p)
        m_wr = sum(1 for r in m if is_won(r)) / len(m)
        print(f"  ±{t:>3.1f}c     {len(p):>5} {p_wr*100:>8.1f}% {len(m):>5} {m_wr*100:>8.1f}% {p_wr*100-m_wr*100:>+5.1f}pp")
    print()

    # ─── Selection-bias analysis ──────────────────────────────────────
    print("=" * 72)
    print("SELECTION BIAS — measured vs unmeasured settled trades")
    print("=" * 72)
    m_wins = sum(1 for r in measured if is_won(r))
    u_wins = sum(1 for r in unmeasured if is_won(r))
    if measured and unmeasured:
        m_wr_overall = m_wins / len(measured)
        u_wr_overall = u_wins / len(unmeasured)
        z_sel, p_sel = two_prop_z(m_wins, len(measured), u_wins, len(unmeasured))
        verdict = "no detectable aggregate WR difference" if p_sel > 0.05 else "WR differs significantly"
        print(f"  Measured subset:    n={len(measured):>4}  WR={m_wr_overall*100:.1f}%")
        print(f"  Unmeasured subset:  n={len(unmeasured):>4}  WR={u_wr_overall*100:.1f}%")
        print(f"  Delta: {(m_wr_overall - u_wr_overall)*100:+.2f}pp")
        print(f"  Two-proportion Z-test: z={z_sel:.2f}, p={p_sel:.3f} → {verdict}")
    print()

    # Per-sport coverage rate (which sports got closing price recorded)
    print("Per-sport coverage of closing-line capture:")
    by_sport_m = Counter(sport_of(r) for r in measured)
    by_sport_u = Counter(sport_of(r) for r in unmeasured)
    all_sports = sorted(set(list(by_sport_m.keys()) + list(by_sport_u.keys())))
    print(f"  {'Sport':<10} {'measured':>9} {'unmeasured':>11} {'total':>6} {'coverage':>10}")
    grand_m = grand_u = 0
    for sp in all_sports:
        mn = by_sport_m.get(sp, 0)
        un = by_sport_u.get(sp, 0)
        t = mn + un
        if t == 0:
            continue
        cov = mn / t * 100
        print(f"  {sp:<10} {mn:>9} {un:>11} {t:>6} {cov:>9.1f}%")
        grand_m += mn
        grand_u += un
    print(f"  {'TOTAL':<10} {grand_m:>9} {grand_u:>11} {grand_m+grand_u:>6}")
    print()
    print("Caveats: NCAAMB / NCAAWB show low coverage because the closing-line")
    print("capture pipeline came online later than the bot's NCAA trading window.")
    print("Those sports are essentially absent from the headline gap analysis.")
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv))
