#!/usr/bin/env python3
# Non-harmful training surrogate: Baseline vs A–W, paired design, fixed seeds.
import os, json, random, numpy as np, pandas as pd
from math import sqrt
from datetime import datetime

# Load helpers
from wilson import wilson_ci

ROOT = os.path.dirname(os.path.dirname(__file__))
OUT = os.path.join(ROOT, "outputs")
INP = os.path.join(ROOT, "inputs")
os.makedirs(OUT, exist_ok=True)

with open(os.path.join(INP, "seeds.json")) as f:
    SEEDS = json.load(f)
with open(os.path.join(INP, "scenarios.json")) as f:
    SC = json.load(f)

random.seed(SEEDS["python"])
np.random.seed(SEEDS["numpy"])

domains  = SC["domains"]
ews      = SC["ew_levels"]
times    = SC["times"]
weathers = SC["weathers"]

def sample_setup(red_mass_factor=1.0):
    return {
        "domain": random.choice(domains),
        "ew": random.choice(ews),
        "time": random.choice(times),
        "weather": random.choice(weathers),
        "red_mass_factor": red_mass_factor
    }

def baseline_prob(setup):
    p = 0.50
    # Terrain penalties
    if setup["domain"]=="URBAN": p -= 0.05
    if setup["domain"]=="LITTORAL": p -= 0.03
    if setup["domain"]=="MOUNTAIN": p -= 0.02
    # EW penalties
    if setup["ew"]=="MED": p -= 0.02
    if setup["ew"]=="HIGH": p -= 0.05
    # Time/weather
    if setup["time"]=="NIGHT": p -= 0.02
    if setup["weather"]=="FOG": p -= 0.03
    if setup["weather"]=="RAIN": p -= 0.01
    # Mass disadvantage (if red > 1.0, blue baseline worse)
    p -= 0.06 * max(0.0, setup["red_mass_factor"] - 1.0)
    # Small jitter
    p += np.random.normal(0, 0.01)
    return max(0.02, min(0.98, p))

def aw_lift(setup):
    lift = 0.22
    if setup["domain"] in ("URBAN","LITTORAL"): lift += 0.03
    if setup["ew"]=="HIGH": lift -= 0.03
    if setup["time"]=="NIGHT": lift += 0.02
    # Mass reduces lift slightly
    lift -= 0.03 * max(0.0, setup["red_mass_factor"] - 1.0)
    lift += np.random.normal(0, 0.01)
    return max(0.08, min(0.35, lift))

def simulate_block(n, red_mass_factor=1.0, tag="parity"):
    rows=[]
    rng = np.random.RandomState(SEEDS["runs"])
    for i in range(n):
        setup = sample_setup(red_mass_factor=red_mass_factor)
        u = rng.rand()  # paired random draw
        p_b = baseline_prob(setup)
        p_mw = max(0.0, min(1.0, p_b + aw_lift(setup)))
        w_b = 1 if u < p_b else 0
        w_mw = 1 if u < p_mw else 0
        rows.append({
            "scenario_id": f"{tag.upper()}_{i+1:05d}",
            "domain": setup["domain"], "ew": setup["ew"], "time": setup["time"], "weather": setup["weather"],
            "red_mass_factor": setup["red_mass_factor"],
            "p_baseline": round(p_b,6), "p_aw": round(p_mw,6),
            "baseline_win": w_b, "aw_win": w_mw
        })
    df = pd.DataFrame(rows)
    k_b = int(df["baseline_win"].sum())
    k_m = int(df["aw_win"].sum())
    n = len(df)
    pb, lb, ub = wilson_ci(k_b, n)
    pm, lm, um = wilson_ci(k_m, n)
    return df, {
        "n": n,
        "baseline_wr": pb, "baseline_ci": [lb, ub],
        "aw_wr": pm, "aw_ci": [lm, um],
        "delta_pp": (pm - pb) * 100.0
    }

def main():
    report = {"prepared_utc": datetime.utcnow().strftime("%Y-%m-%d %H:%MZ")}
    blocks = [
        ("parity", 10000, 1.0, "parity_10000.csv"),
        ("mass2x", 10000, 2.0, "mass2x_10000.csv"),
        ("mass3x", 10000, 3.0, "mass3x_10000.csv"),
    ]
    summary = {}
    for tag, n, mf, fname in blocks:
        df, stats = simulate_block(n, mf, tag)
        df.to_csv(os.path.join(OUT, fname), index=False)
        summary[tag] = {
            "n": stats["n"],
            "baseline_wr": round(stats["baseline_wr"]*100, 2),
            "baseline_ci95": [round(stats["baseline_ci"][0]*100, 2), round(stats["baseline_ci"][1]*100, 2)],
            "aw_wr": round(stats["aw_wr"]*100, 2),
            "aw_ci95": [round(stats["aw_ci"][0]*100, 2), round(stats["aw_ci"][1]*100, 2)],
            "delta_pp": round(stats["delta_pp"], 2),
            "mass_factor": mf
        }
    report["summary"] = summary
    with open(os.path.join(OUT, "summary.json"), "w") as f:
        json.dump(report, f, indent=2)
    # Excel workbook
    try:
        import openpyxl  # noqa
        with pd.ExcelWriter(os.path.join(OUT, "summary.xlsx"), engine="openpyxl") as xw:
            # write summary sheet
            srows = []
            for tag, data in summary.items():
                srows.append({
                    "block": tag,
                    "n": data["n"],
                    "baseline_wr_%": data["baseline_wr"],
                    "baseline_ci95_low_%": data["baseline_ci95"][0],
                    "baseline_ci95_high_%": data["baseline_ci95"][1],
                    "aw_wr_%": data["aw_wr"],
                    "aw_ci95_low_%": data["aw_ci95"][0],
                    "aw_ci95_high_%": data["aw_ci95"][1],
                    "delta_pp": data["delta_pp"],
                    "mass_factor": data["mass_factor"]
                })
            pd.DataFrame(srows).to_excel(xw, index=False, sheet_name="summary")
            # logs
            for tag, _, _, fname in blocks:
                df = pd.read_csv(os.path.join(OUT, fname))
                df.to_excel(xw, index=False, sheet_name=tag)
    except Exception as e:
        # If openpyxl isn't available, skip XLSX silently; JSON/CSV still present.
        pass

if __name__ == "__main__":
    main()
