import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
np.random.seed(42)
def generate_experiment_data(
n: int = 10_000,
tau: float = 5.0,
p_score: float = 0.5,
) -> pd.DataFrame:
X = np.random.binomial(1, 0.4, n)
W = np.random.binomial(1, p_score, n)
Y_0 = 50 + 10 * X + np.random.normal(0, 15, n)
Y_1 = Y_0 + tau
Y = W * Y_1 + (1 - W) * Y_0
return pd.DataFrame({"Y": Y, "W": W, "X": X})
df = generate_experiment_data()
df.head()
| Y | W | X | |
|---|---|---|---|
| 0 | 37.942952 | 0 | 0 |
| 1 | 68.789877 | 0 | 1 |
| 2 | 79.594207 | 0 | 1 |
| 3 | 36.596265 | 1 | 0 |
| 4 | 75.684616 | 0 | 0 |
smf.ols("Y ~ W", data=df).fit(cov_type="HC2").summary().tables[1]
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 54.0575 | 0.222 | 243.512 | 0.000 | 53.622 | 54.493 |
| W | 4.7700 | 0.312 | 15.304 | 0.000 | 4.159 | 5.381 |
smf.ols("np.log(Y) ~ W", data=df).fit(cov_type="HC2").summary().tables[1]
/home/alal/miniforge3/envs/metrics/lib/python3.11/site-packages/pandas/core/arraylike.py:399: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 3.9411 | 0.005 | 820.545 | 0.000 | 3.932 | 3.951 |
| W | 0.0941 | 0.006 | 14.857 | 0.000 | 0.082 | 0.107 |
Population vs Sample Variance¶
Population variance (if we knew the true μ): $$\text{Var}(X) = E[(X - \mu)^2] = E[X^2] - (E[X])^2$$
Sample Estimation from Compressed Stats¶
We store: $(n, \sum X_i, \sum X_i^2)$
Step 1: Compute the sample moments
- First moment: $\bar{X} = \frac{1}{n}\sum X_i$
- Second moment: $\overline{X^2} = \frac{1}{n}\sum X_i^2$
Step 2: Apply the identity $\text{Var}(X) = E[X^2] - (E[X])^2$
Using sample analogs: $$s^2 = \overline{X^2} - \bar{X}^2 = \frac{1}{n}\sum X_i^2 - \left(\frac{1}{n}\sum X_i\right)^2$$
Step 3: Simplify $$s^2 = \frac{1}{n}\sum X_i^2 - \frac{1}{n^2}(\sum X_i)^2 = \frac{1}{n}\left[\sum X_i^2 - \frac{(\sum X_i)^2}{n}\right]$$
This gives the biased estimator (dividing by $n$).
The Division¶
For the unbiased estimator (Bessel's correction), we divide by $(n-1)$ instead: $$s^2 = \frac{1}{n-1}\left[\sum X_i^2 - \frac{(\sum X_i)^2}{n}\right]$$
Y1, Y0 = df.query("W == 1")["Y"], df.query("W == 0")["Y"]
sum_y1, sum_y0 = Y1.sum(), Y0.sum()
sum_y1_sq, sum_y0_sq = (Y1**2).sum(), (Y0**2).sum()
n1, n0 = len(Y1), len(Y0)
# ATE
ybar1, ybar0 = sum_y1 / n1, sum_y0 / n0
ate = ybar1 - ybar0
# Variance of ATE
v1 = (sum_y1_sq - sum_y1**2 / n1) / n1
v0 = (sum_y0_sq - sum_y0**2 / n0) / n0
ate_se = np.sqrt(v1 / n1 + v0 / n0)
print(f"ATE: {ate:.4f}, SE: {ate_se:.4f}, T-stat: {ate / ate_se:.4f}")
ATE: 4.7700, SE: 0.3116, T-stat: 15.3057
The Setup¶
- θ = (μ₁, μ₀)' ∈ ℝ² is a 2-vector
- g(θ) = μ₁/μ₀ - 1 is our function (relative lift)
- √n(θ̂ - θ) →_d N(0, Ω) where Ω = diag(σ₁²/n₁, σ₀²/n₀) under independence
The Gradient¶
$$\nabla g(\theta) = \begin{bmatrix} \frac{\partial g}{\partial \mu_1} \\ \frac{\partial g}{\partial \mu_0} \end{bmatrix} = \begin{bmatrix} \frac{1}{\mu_0} \\ -\frac{\mu_1}{\mu_0^2} \end{bmatrix}$$
The Result¶
$$\text{Var}(\hat{\rho}) = \nabla g(\theta)' \text{Var}(\hat{\theta}) \nabla g(\theta) = \frac{1}{\mu_0^2}\left[\frac{\sigma_1^2}{n_1} + R^2 \frac{\sigma_0^2}{n_0}\right]$$
where R = μ₁/μ₀.
The "Simpler" Derivation¶
The "simpler" first-order Taylor expansion you might remember:
$$R̂ - R \approx \frac{1}{\mu_0}(\bar{y}_1 - \mu_1) - \frac{\mu_1}{\mu_0^2}(\bar{y}_0 - \mu_0)$$
is actually the same thing—it's just computing the gradient and applying it inline! Both give identical results.
yratio = ybar1 / ybar0
relative_lift = yratio - 1
v_lift = 1 / (ybar0**2) * (v1 / n1 + (yratio**2) * v0 / n0)
se_lift = np.sqrt(v_lift)
print(
f"Relative Lift: {relative_lift:.4f}, SE: {se_lift:.4f}, T-stat: {relative_lift / se_lift:.4f}"
)
Relative Lift: 0.0882, SE: 0.0060, T-stat: 14.6369
from scipy import stats
print(stats.binomtest(n1, n0+n1, p=0.5, alternative='two-sided'))
BinomTestResult(k=5064, n=10000, alternative='two-sided', statistic=0.5064, pvalue=0.20408231993686038)
p_score = 0.5
n_total = n1 + n0
expected_treated = n_total * p_score
# Binomial test (two-sided)
# P(X ≤ k) for lower tail, P(X ≥ k) for upper tail
p_lower = stats.binom.cdf(n1, n_total, p_score)
p_upper = 1 - stats.binom.cdf(n1 - 1, n_total, p_score)
print(p_lower, p_upper)
p_value = 2 * min(p_lower, p_upper)
print(f"Number treated: {n1}, Expected: {expected_treated}, p-value: {p_value:.4f}")
0.9014759101507688 0.10204115996843022 Number treated: 5064, Expected: 5000.0, p-value: 0.2041