Simple causal regression graphics

Source: SimpleCausal/causal.Rmd

These simulations show how a regression slope can be interpreted as a treatment contrast in simple randomized settings, and why straight-line summaries can mislead for nonlinear treatment response.

Code

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

rng = np.random.default_rng(1151)

Linear treatment response

Code

N = 50
x = rng.uniform(1, 5, N)
y = rng.normal(10 + 3*x, 3)
x_binary = (x >= 3).astype(int)
data = pd.DataFrame({"x": x, "y": y, "x_binary": x_binary})
fit_bin = smf.ols("y ~ x_binary", data=data).fit()
fit_cont = smf.ols("y ~ x", data=data).fit()
fit_bin.params, fit_cont.params

(Intercept    16.299469
 x_binary      5.344403
 dtype: float64,
 Intercept    10.871194
 x             2.792726
 dtype: float64)

Code

fig, axs = plt.subplots(1, 2, figsize=(10, 4))
axs[0].scatter(x_binary, y, s=18, color="black")
axs[0].plot([0, 1], fit_bin.params["Intercept"] + fit_bin.params["x_binary"]*np.array([0,1]), color="black")
axs[0].set_xticks([0, 1], ["Control", "Treatment"])
axs[0].set_ylabel("Outcome measurement")
axs[0].set_title("Binary treatment")
xs = np.linspace(1, 5, 100)
axs[1].scatter(x, y, s=18, color="black")
axs[1].plot(xs, fit_cont.params["Intercept"] + fit_cont.params["x"]*xs, color="black")
axs[1].set_xlabel("Treatment level")
axs[1].set_title("Continuous treatment")

Text(0.5, 1.0, 'Continuous treatment')

Nonlinear response approximated with a line

Code

y2 = rng.normal(5 + 30*np.exp(-x), 2)
data2 = pd.DataFrame({"x": x, "y": y2})
fit_nl = smf.ols("y ~ x", data=data2).fit()
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
for ax in axs:
    ax.scatter(x, y2, s=18, color="black")
    ax.set_xlabel("Treatment level")
xs = np.linspace(1, 5, 100)
axs[0].plot(xs, 5 + 30*np.exp(-xs), color="black")
axs[0].set_title("Nonlinear treatment effect")
axs[1].plot(xs, fit_nl.params["Intercept"] + fit_nl.params["x"]*xs, color="black")
axs[1].set_title("Straight-line approximation")

Text(0.5, 1.0, 'Straight-line approximation')

Pre-treatment predictor plus treatment indicator

Code

N = 100
z = np.repeat([0, 1], N//2)
xx = np.where(z == 0, rng.normal(0, 1.2, N)**2, rng.normal(0, .8, N)**2)
yy = rng.normal(20 + 5*xx + 10*z, 3)
d3 = pd.DataFrame({"xx": xx, "z": z, "yy": yy})
fit_adj = smf.ols("yy ~ xx + z", data=d3).fit()
fit_adj.params

Intercept    19.127654
xx            5.230688
z            11.078474
dtype: float64

Code

fig, ax = plt.subplots(figsize=(6, 4))
ax.scatter(xx[z==0], yy[z==0], s=18, color="black", label="Controls")
ax.scatter(xx[z==1], yy[z==1], s=18, facecolors="none", edgecolors="black", label="Treated")
xs = np.linspace(0, xx.max(), 100)
ax.plot(xs, fit_adj.params["Intercept"] + fit_adj.params["xx"]*xs, color="black")
ax.plot(xs, fit_adj.params["Intercept"] + fit_adj.params["z"] + fit_adj.params["xx"]*xs, color="black", linestyle="--")
ax.set_xlabel("Pre-treatment predictor")
ax.set_ylabel("Outcome measurement")
ax.legend()

# Simple causal regression graphics Source: `SimpleCausal/causal.Rmd` These simulations show how a regression slope can be interpreted as a treatment contrast in simple randomized settings, and why straight-line summaries can mislead for nonlinear treatment response. ```{python} import numpy as np import pandas as pd import matplotlib.pyplot as plt import statsmodels.formula.api as smf rng = np.random.default_rng(1151) ``` ## Linear treatment response ```{python} N = 50 x = rng.uniform(1, 5, N) y = rng.normal(10 + 3*x, 3) x_binary = (x >= 3).astype(int) data = pd.DataFrame({"x": x, "y": y, "x_binary": x_binary}) fit_bin = smf.ols("y ~ x_binary", data=data).fit() fit_cont = smf.ols("y ~ x", data=data).fit() fit_bin.params, fit_cont.params ``` ```{python} fig, axs = plt.subplots(1, 2, figsize=(10, 4)) axs[0].scatter(x_binary, y, s=18, color="black") axs[0].plot([0, 1], fit_bin.params["Intercept"] + fit_bin.params["x_binary"]*np.array([0,1]), color="black") axs[0].set_xticks([0, 1], ["Control", "Treatment"]) axs[0].set_ylabel("Outcome measurement") axs[0].set_title("Binary treatment") xs = np.linspace(1, 5, 100) axs[1].scatter(x, y, s=18, color="black") axs[1].plot(xs, fit_cont.params["Intercept"] + fit_cont.params["x"]*xs, color="black") axs[1].set_xlabel("Treatment level") axs[1].set_title("Continuous treatment") ``` ## Nonlinear response approximated with a line ```{python} y2 = rng.normal(5 + 30*np.exp(-x), 2) data2 = pd.DataFrame({"x": x, "y": y2}) fit_nl = smf.ols("y ~ x", data=data2).fit() fig, axs = plt.subplots(1, 2, figsize=(10, 4)) for ax in axs: ax.scatter(x, y2, s=18, color="black") ax.set_xlabel("Treatment level") xs = np.linspace(1, 5, 100) axs[0].plot(xs, 5 + 30*np.exp(-xs), color="black") axs[0].set_title("Nonlinear treatment effect") axs[1].plot(xs, fit_nl.params["Intercept"] + fit_nl.params["x"]*xs, color="black") axs[1].set_title("Straight-line approximation") ``` ## Pre-treatment predictor plus treatment indicator ```{python} N = 100 z = np.repeat([0, 1], N//2) xx = np.where(z == 0, rng.normal(0, 1.2, N)**2, rng.normal(0, .8, N)**2) yy = rng.normal(20 + 5*xx + 10*z, 3) d3 = pd.DataFrame({"xx": xx, "z": z, "yy": yy}) fit_adj = smf.ols("yy ~ xx + z", data=d3).fit() fit_adj.params ``` ```{python} fig, ax = plt.subplots(figsize=(6, 4)) ax.scatter(xx[z==0], yy[z==0], s=18, color="black", label="Controls") ax.scatter(xx[z==1], yy[z==1], s=18, facecolors="none", edgecolors="black", label="Treated") xs = np.linspace(0, xx.max(), 100) ax.plot(xs, fit_adj.params["Intercept"] + fit_adj.params["xx"]*xs, color="black") ax.plot(xs, fit_adj.params["Intercept"] + fit_adj.params["z"] + fit_adj.params["xx"]*xs, color="black", linestyle="--") ax.set_xlabel("Pre-treatment predictor") ax.set_ylabel("Outcome measurement") ax.legend() ```