{
  "version": "1.0",
  "title": "ZenHodl vs Polymarket Consensus — MLB June 2026 Regular-Season Sample",
  "published_at": "2026-04-24T12:50:00Z",
  "scope": {
    "sport": "MLB",
    "window": "First 100 MLB regular-season games tipping on or after 2026-06-01 for which both ZenHodl and Polymarket markets are available at T-60",
    "first_eligible_game_after": "2026-06-01T00:00:00Z",
    "last_eligible_game_before": "2026-06-30T23:59:59Z",
    "max_games": 100
  },
  "snapshot": {
    "timing": "Both predictions captured no later than T-60 minutes before official first pitch",
    "zenhodl_source": "ZenHodl MLB pregame win probability via internal SignalEngine.get_pregame_predictions('MLB')",
    "polymarket_source": "Polymarket MLB game-winner market mid price (best bid + best ask) / 2, fetched from clob.polymarket.com. Tip-off time extracted from Polymarket event slug pattern mlb-{home}-{away}-YYYY-MM-DD — not from market endDate (which is market resolution, not game start).",
    "matching": "Each MLB game matched to its Polymarket market by team names + game date from slug.",
    "tie_handling": "If either source is unavailable at T-60, the game is excluded from BOTH model's metrics. Recorded with status='polymarket_unavailable' or 'zenhodl_unavailable' in the public raw.jsonl."
  },
  "metrics": {
    "headline": "Expected Calibration Error (ECE), 10 equal-width bins",
    "auxiliary": [
      "Brier score",
      "Log loss",
      "Accuracy"
    ],
    "ece_formula": "Sum over bins of |bin_avg_pred - bin_avg_outcome| weighted by bin sample fraction",
    "confidence_interval": "95% bootstrap CI on ECE with 1000 resamples, published alongside point estimate",
    "extra_innings_rule": "The winning team at the end of the game (regardless of inning count) is the outcome. No ties."
  },
  "why_mlb_regular_season": "Regular-season MLB offers a large, liquid Polymarket market for nearly every game. A 100-game June sample provides enough data points for meaningful ECE confidence intervals in roughly 30 days, enabling a faster pre-committed test cycle than waiting for October playoffs.",
  "sample_size_justification": "100 games resolves enough of the Polymarket spectrum (20-30%, 40-60%, 70-80% bins) to estimate ECE with a CI of approximately ±0.02 at 95% confidence, comparable to the NBA playoffs benchmark sample size.",
  "model_versioning": {
    "policy": "ZenHodl MLB model weights as deployed at T-60 of each game are what counts.",
    "retrains_during_window": "Permitted. Disclosed in the per-game row's model_version field."
  },
  "publication": {
    "live_url": "https://zenhodl.net/benchmarks/mlb-june-2026-sample",
    "raw_data_jsonl": "https://zenhodl.net/benchmarks/mlb-june-2026-sample/raw.jsonl",
    "manifest_file": "https://zenhodl.net/benchmarks/mlb-june-2026-sample/manifest.json",
    "we_publish_when_we_lose": true
  },
  "rule_changes": "Once this manifest's SHA-256 hash is broadcast on Polygon, the rules above are frozen. If ZenHodl edits this file at any later point, the on-chain hash will not match the served file. Anyone can verify by hashing the served manifest.json and comparing to the on-chain transaction data field."
}
