Golf putting accuracy

Source: Golf/golf.Rmd

This page ports the ROS golf-putting example to Python. The data are binomial counts of successful putts at different distances. We fit a Bayesian logistic regression and the geometry-based model from the book, where putt success is driven by angular error.

Code
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import expit, logit
from scipy.stats import norm, binom, halfnorm
from python.bayes_glm import bayes_logit

root = Path("../../ROS-Examples")
golf_dir = root / "Golf"
golf = pd.read_csv(golf_dir / "data/golf.txt", sep=r"\s+", skiprows=2)
golf["p_hat"] = golf["y"] / golf["n"]
golf["se"] = np.sqrt(golf["p_hat"] * (1 - golf["p_hat"]) / golf["n"])
r = (1.68 / 2) / 12
R = (4.25 / 2) / 12
golf.head()
x n y p_hat se
0 2 1443 1346 0.932779 0.006592
1 3 694 577 0.831412 0.014212
2 4 455 337 0.740659 0.020547
3 5 353 208 0.589235 0.026185
4 6 272 149 0.547794 0.030178

Plot the data

Code
fig, ax = plt.subplots(figsize=(7, 5))
ax.errorbar(golf.x, golf.p_hat, yerr=golf.se, fmt="o", color="black", lw=0.8)
for _, row in golf.iterrows():
    ax.text(row.x + 0.4, row.p_hat + row.se + 0.02, f"{int(row.y)}/{int(row.n)}", fontsize=8, color="0.35")
ax.set(xlim=(0, 1.1 * golf.x.max()), ylim=(0, 1.02), xlabel="Distance from hole (feet)", ylabel="Probability of success", title="Data on putts in pro golf")
ax.spines[["top", "right"]].set_visible(False)

Bayesian logistic regression

The R page uses rstanarm::stan_glm(cbind(y, n-y) ~ x, family=binomial). For a fast executed port we expand the grouped binomial counts to Bernoulli rows and fit a Normal-prior Bayesian logistic regression via the shared bayes_logit helper.

Code
expanded = pd.DataFrame({
    "x": np.repeat(golf.x.to_numpy(), golf.n.astype(int).to_numpy()),
    "made": np.concatenate([np.r_[np.ones(int(y)), np.zeros(int(n-y))] for y, n in zip(golf.y, golf.n)]).astype(int),
})
fit_logit = bayes_logit("made ~ x", expanded, draws=3000, prior_scale=2.5, seed=1507)
fit_logit.summary().round(3)
mean sd q5 q50 q95
Intercept 2.229 0.058 2.136 2.228 2.326
x -0.256 0.007 -0.266 -0.255 -0.245
Code
x_grid = np.linspace(0, 1.1 * golf.x.max(), 300)
p_draws = fit_logit.epred(pd.DataFrame({"x": x_grid}))
p_logit = np.median(p_draws, axis=0)
p_band = np.quantile(p_draws, [0.1, 0.9], axis=0)

fig, ax = plt.subplots(figsize=(7, 5))
ax.errorbar(golf.x, golf.p_hat, yerr=golf.se, fmt="o", color="black", lw=0.8)
ax.plot(x_grid, p_logit, color="black")
ax.fill_between(x_grid, p_band[0], p_band[1], color="gray", alpha=.2, linewidth=0)
ax.set(xlim=(0, 1.1 * golf.x.max()), ylim=(0, 1.02), xlabel="Distance from hole (feet)", ylabel="Probability of success", title="Bayesian logistic regression")
ax.spines[["top", "right"]].set_visible(False)

Geometry-based nonlinear model

The custom model uses the angle subtended by the effective target, asin((R-r)/x), and a single angular-error parameter sigma:

\[ p(x;\sigma) = 2\Phi\left(\frac{\sin^{-1}((R-r)/x)}{\sigma}\right)-1. \]

We compute the one-dimensional posterior on a grid with a weak half-normal prior for sigma.

Code
def p_geometry(x, sigma):
    x = np.asarray(x, dtype=float)
    p = np.ones_like(x, dtype=float)
    ok = x > (R - r)
    p[ok] = 2 * norm.cdf(np.arcsin((R - r) / x[ok]) / sigma) - 1
    return np.clip(p, 1e-9, 1-1e-9)

sigma_grid = np.linspace(0.005, 0.30, 3000)
log_prior = halfnorm(scale=0.10).logpdf(sigma_grid)
log_lik = np.array([
    np.sum(binom.logpmf(golf.y, golf.n, p_geometry(golf.x, sig)))
    for sig in sigma_grid
])
log_post = log_prior + log_lik
post = np.exp(log_post - log_post.max())
post = post / np.trapezoid(post, sigma_grid)
cdf = np.cumsum(post); cdf = cdf / cdf[-1]
rng = np.random.default_rng(1507)
sigma_draws = np.interp(rng.uniform(size=3000), cdf, sigma_grid)
sigma_hat = np.median(sigma_draws)
sigma_hat, sigma_hat * 180 / np.pi
(np.float64(0.02660639330318785), np.float64(1.5244340443378013))
Code
x_geom = np.linspace(R - r, 1.1 * golf.x.max(), 300)
fig, ax = plt.subplots(figsize=(7, 5))
ax.errorbar(golf.x, golf.p_hat, yerr=golf.se, fmt="o", color="black", lw=0.8)
for sigma in sigma_draws[:60]:
    ax.plot(np.r_[0, R-r, x_geom], np.r_[1, 1, p_geometry(x_geom, sigma)], color="0.5", lw=0.5, alpha=0.25)
ax.plot(np.r_[0, R-r, x_geom], np.r_[1, 1, p_geometry(x_geom, sigma_hat)], color="black", lw=1.5)
ax.text(18.5, 0.26, f"Geometry model\n sigma = {sigma_hat * 180 / np.pi:.1f} degrees")
ax.set(xlim=(0, 1.1 * golf.x.max()), ylim=(0, 1.02), xlabel="Distance from hole (feet)", ylabel="Probability of success", title="Geometry model: posterior draws of sigma")
ax.spines[["top", "right"]].set_visible(False)

Model comparison plot

Code
fig, ax = plt.subplots(figsize=(7, 5))
ax.errorbar(golf.x, golf.p_hat, yerr=golf.se, fmt="o", color="black", lw=0.8)
ax.plot(x_grid, p_logit, label="Bayesian logistic regression")
ax.plot(np.r_[0, R-r, x_geom], np.r_[1, 1, p_geometry(x_geom, sigma_hat)], label="Geometry-based model")
ax.set(xlim=(0, 1.1 * golf.x.max()), ylim=(0, 1.02), xlabel="Distance from hole (feet)", ylabel="Probability of success", title="Two Bayesian models fit to the golf putting data")
ax.legend(frameon=False)
ax.spines[["top", "right"]].set_visible(False)

Log-density sketch

The geometry model is also a natural example for custom samplers: the log density over log_sigma is just the binomial likelihood plus the prior and Jacobian.

Code
def golf_geometry_logdensity(log_sigma):
    sigma = np.exp(log_sigma)
    p = p_geometry(golf.x, sigma)
    return np.sum(binom.logpmf(golf.y, golf.n, p)) + halfnorm(scale=0.10).logpdf(sigma) + log_sigma

golf_geometry_logdensity(np.log(sigma_hat))
np.float64(-85.035992131003)