Stents: comparing pre/post distributions

Source: Stents/stents.Rmd

The stents example compares treatment and control arms using only published summaries. The point is that comparing post-treatment means is less precise than comparing gains or a regression-adjusted post-treatment outcome when baseline and follow-up measurements are correlated.

Code

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import t, norm

Helper for summary-data comparisons

Code

def stent_summary(n, y_pre, y_post, s_pre, s_post, ci_width):
    n = np.asarray(n, dtype=float)
    y_pre = np.asarray(y_pre, dtype=float)
    y_post = np.asarray(y_post, dtype=float)
    s_pre = np.asarray(s_pre, dtype=float)
    s_post = np.asarray(s_post, dtype=float)
    ci_width = np.asarray(ci_width, dtype=float)

    se_change = ci_width / (2 * t.ppf(.975, n - 1))
    s_change = se_change * np.sqrt(n)
    rho = (s_pre**2 + s_post**2 - s_change**2) / (2 * s_pre * s_post)
    b = np.mean(rho * s_post / s_pre)

    diff_simple = y_post[0] - y_post[1]
    diff_gain = (y_post - y_pre)[0] - (y_post - y_pre)[1]
    diff_reg = (y_post - b*y_pre)[0] - (y_post - b*y_pre)[1]

    se_simple = np.sqrt(np.sum(s_post**2 / n))
    se_gain = np.sqrt(np.sum(s_pre**2/n + s_post**2/n - 2*rho*s_pre*s_post/n))
    se_reg = np.sqrt(np.sum(b**2*s_pre**2/n + s_post**2/n - 2*b*rho*s_pre*s_post/n))

    out = pd.DataFrame({
        "estimate": [diff_simple, diff_gain, diff_reg],
        "std_error": [se_simple, se_gain, se_reg],
    }, index=["post mean", "gain score", "regression adjusted"])
    out["z"] = out.estimate / out.std_error
    out["two_sided_p"] = 2 * norm.cdf(-np.abs(out.z))
    return out, rho, b

Exercise time

Code

exercise, rho_ex, b_ex = stent_summary(
    n=[104, 90],
    y_pre=[528.0, 490.0],
    y_post=[556.3, 501.8],
    s_pre=[178.7, 195.0],
    s_post=[178.7, 190.9],
    ci_width=[45.1 - 11.6, 31.3 - (-7.8)],
)
exercise.round(2)

	estimate	std_error	z	two_sided_p
post mean	54.50	26.68	2.04	0.04
gain score	16.50	12.97	1.27	0.20
regression adjusted	21.28	12.50	1.70	0.09

Code

z = exercise.loc["regression adjusted", "z"]
power_right = 1 - norm.cdf(1.96 - z)
power_left = norm.cdf(-1.96 - z)
print(power_right, power_left)

0.3981780954972123 0.0001251590390651888

Treadmill score

Code

treadmill, rho_tm, b_tm = stent_summary(
    n=[104, 90],
    y_pre=[4.24, 4.18],
    y_post=[5.46, 4.28],
    s_pre=[4.82, 4.65],
    s_post=[4.79, 4.98],
    ci_width=[2.07 - 0.37, 1.19 - (-0.99)],
)
treadmill.round(2)

	estimate	std_error	z	two_sided_p
post mean	1.18	0.70	1.68	0.09
gain score	1.12	0.70	1.61	0.11
regression adjusted	1.15	0.61	1.88	0.06

Distribution shift picture

Code

x = np.linspace(0, 1000, 500)
scale = norm.pdf(0, 0, 190)
fig, ax = plt.subplots(figsize=(7, 3))
ax.plot(x, norm.pdf(x, 510, 190) / scale, label="controls")
ax.plot(x, norm.pdf(x, 530, 190) / scale, label="treated")
ax.set_xlabel("Exercise time (seconds)")
ax.set_yticks([])
ax.legend()

# Stents: comparing pre/post distributions Source: `Stents/stents.Rmd` The stents example compares treatment and control arms using only published summaries. The point is that comparing post-treatment means is less precise than comparing gains or a regression-adjusted post-treatment outcome when baseline and follow-up measurements are correlated. ```{python} import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy.stats import t, norm ``` ## Helper for summary-data comparisons ```{python} def stent_summary(n, y_pre, y_post, s_pre, s_post, ci_width): n = np.asarray(n, dtype=float) y_pre = np.asarray(y_pre, dtype=float) y_post = np.asarray(y_post, dtype=float) s_pre = np.asarray(s_pre, dtype=float) s_post = np.asarray(s_post, dtype=float) ci_width = np.asarray(ci_width, dtype=float) se_change = ci_width / (2 * t.ppf(.975, n - 1)) s_change = se_change * np.sqrt(n) rho = (s_pre**2 + s_post**2 - s_change**2) / (2 * s_pre * s_post) b = np.mean(rho * s_post / s_pre) diff_simple = y_post[0] - y_post[1] diff_gain = (y_post - y_pre)[0] - (y_post - y_pre)[1] diff_reg = (y_post - b*y_pre)[0] - (y_post - b*y_pre)[1] se_simple = np.sqrt(np.sum(s_post**2 / n)) se_gain = np.sqrt(np.sum(s_pre**2/n + s_post**2/n - 2*rho*s_pre*s_post/n)) se_reg = np.sqrt(np.sum(b**2*s_pre**2/n + s_post**2/n - 2*b*rho*s_pre*s_post/n)) out = pd.DataFrame({ "estimate": [diff_simple, diff_gain, diff_reg], "std_error": [se_simple, se_gain, se_reg], }, index=["post mean", "gain score", "regression adjusted"]) out["z"] = out.estimate / out.std_error out["two_sided_p"] = 2 * norm.cdf(-np.abs(out.z)) return out, rho, b ``` ## Exercise time ```{python} exercise, rho_ex, b_ex = stent_summary( n=[104, 90], y_pre=[528.0, 490.0], y_post=[556.3, 501.8], s_pre=[178.7, 195.0], s_post=[178.7, 190.9], ci_width=[45.1 - 11.6, 31.3 - (-7.8)], ) exercise.round(2) ``` ```{python} z = exercise.loc["regression adjusted", "z"] power_right = 1 - norm.cdf(1.96 - z) power_left = norm.cdf(-1.96 - z) print(power_right, power_left) ``` ## Treadmill score ```{python} treadmill, rho_tm, b_tm = stent_summary( n=[104, 90], y_pre=[4.24, 4.18], y_post=[5.46, 4.28], s_pre=[4.82, 4.65], s_post=[4.79, 4.98], ci_width=[2.07 - 0.37, 1.19 - (-0.99)], ) treadmill.round(2) ``` ## Distribution shift picture ```{python} x = np.linspace(0, 1000, 500) scale = norm.pdf(0, 0, 190) fig, ax = plt.subplots(figsize=(7, 3)) ax.plot(x, norm.pdf(x, 510, 190) / scale, label="controls") ax.plot(x, norm.pdf(x, 530, 190) / scale, label="treated") ax.set_xlabel("Exercise time (seconds)") ax.set_yticks([]) ax.legend() ```