Simulation-based design for student grades

Source: FakeMidtermFinal/simulation_based_design.Rmd

This example simulates randomized and biased treatment assignments for student grades. The estimand is a treatment effect of 5 points. Randomization protects the simple difference in means; a useful pre-test reduces uncertainty; biased assignment creates confounding that adjustment can partly repair.

Code

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import expit
import statsmodels.formula.api as smf

rng = np.random.default_rng(20260531)

First randomized experiment

Code

n = 100
y_if_control = rng.normal(60, 20, size=n)
y_if_treated = y_if_control + 5
z = rng.permutation(np.repeat([0, 1], n // 2))
y = np.where(z == 1, y_if_treated, y_if_control)
fake = pd.DataFrame({"y": y, "z": z})

diff = fake.loc[fake.z == 1, "y"].mean() - fake.loc[fake.z == 0, "y"].mean()
se_diff = np.sqrt(
    fake.loc[fake.z == 0, "y"].var(ddof=1) / (z == 0).sum()
    + fake.loc[fake.z == 1, "y"].var(ddof=1) / (z == 1).sum()
)
pd.Series({"difference in means": diff, "standard error": se_diff}).round(2)

difference in means   -0.85
standard error         3.57
dtype: float64

Code

fit_1a = smf.ols("y ~ z", data=fake).fit()
fake["x"] = rng.normal(50, 20, size=n)
fit_1b = smf.ols("y ~ z + x", data=fake).fit()

pd.DataFrame({
    "simple": [fit_1a.params["z"], fit_1a.bse["z"]],
    "adjusted for irrelevant x": [fit_1b.params["z"], fit_1b.bse["z"]],
}, index=["estimate", "std_error"]).round(2)

	simple	adjusted for irrelevant x
estimate	-0.85	-0.60
std_error	3.57	3.59

An unrelated covariate does not systematically improve the treatment-effect estimate.

Randomized experiment with a realistic pre-test

Code

true_ability = rng.normal(50, 16, size=n)
x = true_ability + rng.normal(0, 12, size=n)
y_if_control = true_ability + rng.normal(0, 12, size=n) + 10
y_if_treated = y_if_control + 5
z = rng.permutation(np.repeat([0, 1], n // 2))
y = np.where(z == 1, y_if_treated, y_if_control)
fake_2 = pd.DataFrame({"x": x, "y": y, "z": z})

fit_2a = smf.ols("y ~ z", data=fake_2).fit()
fit_2b = smf.ols("y ~ z + x", data=fake_2).fit()

pd.DataFrame({
    "simple": [fit_2a.params["z"], fit_2a.bse["z"]],
    "adjusted for pre-test": [fit_2b.params["z"], fit_2b.bse["z"]],
}, index=["estimate", "std_error"]).round(2)

	simple	adjusted for pre-test
estimate	-0.13	0.12
std_error	3.78	3.15

Because the pre-test is predictive of the outcome, adjustment typically reduces the standard error in a randomized experiment.

Selection bias in treatment assignment

Now treatment is more likely for students with lower pre-test scores:

Code

z_biased = rng.binomial(1, expit(-(x - 50) / 20))
y_biased = np.where(z_biased == 1, y_if_treated, y_if_control)
fake_3 = pd.DataFrame({"x": x, "y": y_biased, "z": z_biased})

Code

xx = np.linspace(0, 100, 200)
fig, ax = plt.subplots(figsize=(6, 3.5))
ax.scatter(x, np.where(z_biased == 1, 0.99, 0.01), s=22, alpha=0.7)
ax.plot(xx, expit(-(xx - 50) / 20), color="black")
ax.set_ylim(0, 1)
ax.set_xlabel("Pre-test score")
ax.set_ylabel("Pr(z=1)")
ax.set_title("Biased assignment: lower pre-test scores are treated more often")

Text(0.5, 1.0, 'Biased assignment: lower pre-test scores are treated more often')

Code

fit_3a = smf.ols("y ~ z", data=fake_3).fit()
fit_3b = smf.ols("y ~ z + x", data=fake_3).fit()

pd.DataFrame({
    "simple": [fit_3a.params["z"], fit_3a.bse["z"]],
    "adjusted for pre-test": [fit_3b.params["z"], fit_3b.bse["z"]],
}, index=["estimate", "std_error"]).round(2)

	simple	adjusted for pre-test
estimate	-7.20	2.31
std_error	3.61	3.63

Repeated biased-assignment experiments

Code

def experiment(rng, n=100):
    true_ability = rng.normal(50, 16, size=n)
    x = true_ability + rng.normal(0, 12, size=n)
    y_if_control = true_ability + rng.normal(0, 12, size=n) + 10
    y_if_treated = y_if_control + 5
    z = rng.binomial(1, expit(-(x - 50) / 20))
    y = np.where(z == 1, y_if_treated, y_if_control)
    dat = pd.DataFrame({"x": x, "y": y, "z": z})
    simple = smf.ols("y ~ z", data=dat).fit()
    adjusted = smf.ols("y ~ z + x", data=dat).fit()
    return pd.DataFrame({
        "model": ["simple", "adjusted"],
        "estimate": [simple.params["z"], adjusted.params["z"]],
        "std_error": [simple.bse["z"], adjusted.bse["z"]],
    })

results = pd.concat([experiment(rng, n) for _ in range(50)], keys=range(50), names=["rep"])
results.groupby("model")[["estimate", "std_error"]].mean().round(2)

	estimate	std_error
model
adjusted	5.33	3.39
simple	-5.67	3.87

Code

fig, ax = plt.subplots(figsize=(6, 3.5))
for model, group in results.reset_index().groupby("model"):
    ax.hist(group["estimate"], bins=12, alpha=0.6, label=model)
ax.axvline(5, color="black", linestyle="--", label="true effect")
ax.set_xlabel("estimated treatment effect")
ax.set_ylabel("replications")
ax.legend(frameon=False)

In the biased design, the unadjusted comparison is pulled downward because treated students tend to have lower pre-test scores. Conditioning on the pre-test moves the estimate back toward the true 5-point effect and usually improves precision.

# Simulation-based design for student grades Source: `FakeMidtermFinal/simulation_based_design.Rmd` This example simulates randomized and biased treatment assignments for student grades. The estimand is a treatment effect of 5 points. Randomization protects the simple difference in means; a useful pre-test reduces uncertainty; biased assignment creates confounding that adjustment can partly repair. ```{python} import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy.special import expit import statsmodels.formula.api as smf rng = np.random.default_rng(20260531) ``` ## First randomized experiment ```{python} n = 100 y_if_control = rng.normal(60, 20, size=n) y_if_treated = y_if_control + 5 z = rng.permutation(np.repeat([0, 1], n // 2)) y = np.where(z == 1, y_if_treated, y_if_control) fake = pd.DataFrame({"y": y, "z": z}) diff = fake.loc[fake.z == 1, "y"].mean() - fake.loc[fake.z == 0, "y"].mean() se_diff = np.sqrt( fake.loc[fake.z == 0, "y"].var(ddof=1) / (z == 0).sum() + fake.loc[fake.z == 1, "y"].var(ddof=1) / (z == 1).sum() ) pd.Series({"difference in means": diff, "standard error": se_diff}).round(2) ``` ```{python} fit_1a = smf.ols("y ~ z", data=fake).fit() fake["x"] = rng.normal(50, 20, size=n) fit_1b = smf.ols("y ~ z + x", data=fake).fit() pd.DataFrame({ "simple": [fit_1a.params["z"], fit_1a.bse["z"]], "adjusted for irrelevant x": [fit_1b.params["z"], fit_1b.bse["z"]], }, index=["estimate", "std_error"]).round(2) ``` An unrelated covariate does not systematically improve the treatment-effect estimate. ## Randomized experiment with a realistic pre-test ```{python} true_ability = rng.normal(50, 16, size=n) x = true_ability + rng.normal(0, 12, size=n) y_if_control = true_ability + rng.normal(0, 12, size=n) + 10 y_if_treated = y_if_control + 5 z = rng.permutation(np.repeat([0, 1], n // 2)) y = np.where(z == 1, y_if_treated, y_if_control) fake_2 = pd.DataFrame({"x": x, "y": y, "z": z}) fit_2a = smf.ols("y ~ z", data=fake_2).fit() fit_2b = smf.ols("y ~ z + x", data=fake_2).fit() pd.DataFrame({ "simple": [fit_2a.params["z"], fit_2a.bse["z"]], "adjusted for pre-test": [fit_2b.params["z"], fit_2b.bse["z"]], }, index=["estimate", "std_error"]).round(2) ``` Because the pre-test is predictive of the outcome, adjustment typically reduces the standard error in a randomized experiment. ## Selection bias in treatment assignment Now treatment is more likely for students with lower pre-test scores: ```{python} z_biased = rng.binomial(1, expit(-(x - 50) / 20)) y_biased = np.where(z_biased == 1, y_if_treated, y_if_control) fake_3 = pd.DataFrame({"x": x, "y": y_biased, "z": z_biased}) ``` ```{python} xx = np.linspace(0, 100, 200) fig, ax = plt.subplots(figsize=(6, 3.5)) ax.scatter(x, np.where(z_biased == 1, 0.99, 0.01), s=22, alpha=0.7) ax.plot(xx, expit(-(xx - 50) / 20), color="black") ax.set_ylim(0, 1) ax.set_xlabel("Pre-test score") ax.set_ylabel("Pr(z=1)") ax.set_title("Biased assignment: lower pre-test scores are treated more often") ``` ```{python} fit_3a = smf.ols("y ~ z", data=fake_3).fit() fit_3b = smf.ols("y ~ z + x", data=fake_3).fit() pd.DataFrame({ "simple": [fit_3a.params["z"], fit_3a.bse["z"]], "adjusted for pre-test": [fit_3b.params["z"], fit_3b.bse["z"]], }, index=["estimate", "std_error"]).round(2) ``` ## Repeated biased-assignment experiments ```{python} def experiment(rng, n=100): true_ability = rng.normal(50, 16, size=n) x = true_ability + rng.normal(0, 12, size=n) y_if_control = true_ability + rng.normal(0, 12, size=n) + 10 y_if_treated = y_if_control + 5 z = rng.binomial(1, expit(-(x - 50) / 20)) y = np.where(z == 1, y_if_treated, y_if_control) dat = pd.DataFrame({"x": x, "y": y, "z": z}) simple = smf.ols("y ~ z", data=dat).fit() adjusted = smf.ols("y ~ z + x", data=dat).fit() return pd.DataFrame({ "model": ["simple", "adjusted"], "estimate": [simple.params["z"], adjusted.params["z"]], "std_error": [simple.bse["z"], adjusted.bse["z"]], }) results = pd.concat([experiment(rng, n) for _ in range(50)], keys=range(50), names=["rep"]) results.groupby("model")[["estimate", "std_error"]].mean().round(2) ``` ```{python} fig, ax = plt.subplots(figsize=(6, 3.5)) for model, group in results.reset_index().groupby("model"): ax.hist(group["estimate"], bins=12, alpha=0.6, label=model) ax.axvline(5, color="black", linestyle="--", label="true effect") ax.set_xlabel("estimated treatment effect") ax.set_ylabel("replications") ax.legend(frameon=False) ``` In the biased design, the unadjusted comparison is pulled downward because treated students tend to have lower pre-test scores. Conditioning on the pre-test moves the estimate back toward the true 5-point effect and usually improves precision.