Robit and logit regression

Source: Robit/robit.Rmd

This example compares ordinary logistic regression with a robust binary-response model whose inverse link is a Student-t CDF. The heavier-tailed robit link is less distorted by a deliberately flipped low-x observation.

Simulate data from a logit model

Code
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import expit
from scipy.stats import t
import statsmodels.api as sm

root = Path("../../ROS-Examples")
robit_dir = root / "Robit"

rng = np.random.default_rng(1234)
N = 50
x = rng.uniform(-9, 9, size=N)
a, b = 0.0, 0.8
p = expit(a + b * x)
y = rng.binomial(1, p)
df = 4.0
scale = np.sqrt((df - 2) / df)
data_1 = pd.DataFrame({"x": x, "y": y})
data_1.head()
x y
0 8.580596 1
1 -2.156477 0
2 7.618432 1
3 -4.289536 0
4 -3.256253 1

Frequentist logit baseline

Code
X = sm.add_constant(data_1["x"])
fit_logit_sm_1 = sm.Logit(data_1["y"], X).fit(disp=False)
fit_logit_sm_1.params
const    0.824945
x        1.103819
dtype: float64

Stan models via CmdStanPy

The two original Stan programs are:

// logit.stan
model {
  y ~ bernoulli_logit(a + b*x);
}
// robit.stan
model {
  vector[N] p;
  for (n in 1:N) p[n] = student_t_cdf(a + b*x[n], df, 0, sqrt((df - 2)/df));
  y ~ bernoulli(p);
}
Code
from cmdstanpy import CmdStanModel

stan_dir = Path("_generated_stan")
stan_dir.mkdir(exist_ok=True)
logit_stan = stan_dir / "robit_logit_modern.stan"
logit_stan.write_text(r'''
data {
  int<lower=1> N;
  vector[N] x;
  array[N] int<lower=0, upper=1> y;
}
parameters {
  real a;
  real b;
}
model {
  a ~ normal(0, 5);
  b ~ normal(0, 5);
  y ~ bernoulli_logit(a + b*x);
}
''')
robit_stan = stan_dir / "robit_modern.stan"
robit_stan.write_text(r'''
data {
  int<lower=1> N;
  vector[N] x;
  array[N] int<lower=0, upper=1> y;
  real<lower=2> df;
}
parameters {
  real a;
  real b;
}
model {
  vector[N] p;
  a ~ normal(0, 5);
  b ~ normal(0, 5);
  for (n in 1:N) p[n] = student_t_cdf(a + b*x[n] | df, 0, sqrt((df - 2)/df));
  y ~ bernoulli(p);
}
''')

logit_model = CmdStanModel(stan_file=str(logit_stan))
robit_model = CmdStanModel(stan_file=str(robit_stan))

def stan_data(frame):
    return {"N": len(frame), "x": frame.x.to_numpy(), "y": frame.y.astype(int).tolist(), "df": df}

fit_logit_1 = logit_model.sample(data={k: v for k, v in stan_data(data_1).items() if k != "df"}, seed=1507, chains=4, parallel_chains=4, show_progress=False)
fit_robit_1 = robit_model.sample(data=stan_data(data_1), seed=1507, chains=4, parallel_chains=4, show_progress=False)

med_logit_1 = fit_logit_1.draws_pd()[["a", "b"]].median()
med_robit_1 = fit_robit_1.draws_pd()[["a", "b"]].median()
med_logit_1, med_robit_1
(a    1.018415
 b    1.320840
 dtype: float64,
 a    0.647949
 b    0.878184
 dtype: float64)

Contaminate the data

The R code flips the fourth-smallest x value to y = 1, creating an outlying success far in the left tail.

Code
data_2 = data_1.copy()
low_idx = data_2["x"].sort_values().index[3]
data_2.loc[low_idx, "y"] = 1

fit_logit_2 = logit_model.sample(data={k: v for k, v in stan_data(data_2).items() if k != "df"}, seed=1508, chains=4, parallel_chains=4, show_progress=False)
fit_robit_2 = robit_model.sample(data=stan_data(data_2), seed=1508, chains=4, parallel_chains=4, show_progress=False)
med_logit_2 = fit_logit_2.draws_pd()[["a", "b"]].median()
med_robit_2 = fit_robit_2.draws_pd()[["a", "b"]].median()

plot_fits(data_2, med_logit_2, med_robit_2, "Contaminated data")

The logistic link tends to flatten to accommodate the flipped extreme case. The robit link can assign more tail probability to a single surprising point and preserve a steeper central transition.

BlackJAX log-density sketch

The robit model is also a compact example of a custom Bernoulli likelihood. This function is the log density for parameters (a, b) with weak normal priors.

Code
from scipy.stats import norm as normal_dist, t as student_t

x_np = data_2.x.to_numpy()
y_np = data_2.y.to_numpy()

def robit_logdensity_ab(params):
    a, b = params["a"], params["b"]
    eta = (a + b * x_np) / scale
    p = np.clip(student_t.cdf(eta, df), 1e-9, 1 - 1e-9)
    ll = np.sum(y_np * np.log(p) + (1 - y_np) * np.log1p(-p))
    prior = normal_dist.logpdf(a, 0, 5) + normal_dist.logpdf(b, 0, 5)
    return ll + prior

robit_logdensity_ab({"a": med_robit_2["a"], "b": med_robit_2["b"]})
np.float64(-17.58208368465927)