{
  "version": "1.0",
  "title": "ZenHodl vs Polymarket Consensus — NHL Playoffs 2026 Calibration Benchmark",
  "published_at": "2026-04-24T12:50:00Z",
  "scope": {
    "sport": "NHL",
    "window": "2026 Stanley Cup Playoffs Conference Semifinals through Stanley Cup Finals (inclusive)",
    "first_eligible_game_after": "2026-05-04T00:00:00Z",
    "last_eligible_game_before": "2026-06-25T00:00:00Z"
  },
  "snapshot": {
    "timing": "Both predictions captured no later than T-60 minutes before official puck drop",
    "zenhodl_source": "ZenHodl NHL pregame win probability via internal SignalEngine.get_pregame_predictions('NHL')",
    "polymarket_source": "Polymarket NHL game-winner market mid price (best bid + best ask) / 2, fetched from clob.polymarket.com",
    "matching": "Each NHL game matched to its Polymarket market by team names + game date. Matching script published in this repo so the join is auditable.",
    "tie_handling": "If either source is unavailable at T-60, the game is excluded from BOTH model's metrics. Recorded with status='polymarket_unavailable' or 'zenhodl_unavailable' in the public raw.jsonl."
  },
  "metrics": {
    "headline": "Expected Calibration Error (ECE), 10 equal-width bins",
    "auxiliary": [
      "Brier score",
      "Log loss",
      "Accuracy"
    ],
    "ece_formula": "Sum over bins of |bin_avg_pred - bin_avg_outcome| weighted by bin sample fraction",
    "confidence_interval": "95% bootstrap CI on ECE with 1000 resamples, published alongside point estimate",
    "overtime_rule": "Regulation, overtime, and shootout outcomes all count as the final winner. No tie logic."
  },
  "why_polymarket": "Polymarket's mid-price is the consensus probability of every smart-money trader actively wagering real capital on the game outcome. Beating it on calibration is the canonical hedge-fund-grade benchmark for a sports forecasting model.",
  "why_stanley_cup_conf_semis": "Similar structure to the NBA benchmark. Starting at Conference Semifinals (round 2) gives a clean pre-commit boundary after the first round winners are known, while preserving a ~35-game sample.",
  "model_versioning": {
    "policy": "ZenHodl model weights as deployed at T-60 of each game are what counts. Each prediction row in raw.jsonl includes the model version ID so post-hoc retrains do not invalidate prior predictions.",
    "retrains_during_window": "Permitted. Disclosed in the per-game row's model_version field."
  },
  "publication": {
    "live_url": "https://zenhodl.net/benchmarks/nhl-playoffs-2026",
    "raw_data_jsonl": "https://zenhodl.net/benchmarks/nhl-playoffs-2026/raw.jsonl",
    "manifest_file": "https://zenhodl.net/benchmarks/nhl-playoffs-2026/manifest.json",
    "we_publish_when_we_lose": true
  },
  "rule_changes": "Once this manifest's SHA-256 hash is broadcast on Polygon, the rules above are frozen. If ZenHodl edits this file at any later point, the on-chain hash will not match the served file. Anyone can verify by hashing the served manifest.json and comparing to the on-chain transaction data field."
}
