Congress / historical vote-swing plots

Source: Congress/congress_plots.Rmd

This example builds the Chapter 2 congressional vote-swing grid: for selected elections, compare Democratic vote in election 1 to the swing in election 2, faceted by region. The original data files are fixed-width-ish numeric .asc files; each row contains district identifiers, incumbency, and the two-party vote counts.

Load and stack the biennial files

Code

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def ros_root():
    candidates = [
        Path("../../ROS-Examples"),
        Path("../ROS-Examples"),
        Path("/Users/alal/tmp/ros-python-book/ROS-Examples"),
    ]
    for candidate in candidates:
        if candidate.exists():
            return candidate
    return candidates[0]

root = ros_root()
congress_dir = root / "Congress/data"
region_name = ["Northeast", "Midwest", "South", "West"]

def read_congress_year(year):
    arr = np.loadtxt(congress_dir / f"{year}.asc")
    df = pd.DataFrame(arr, columns=["state_code", "district", "inc", "dem_votes", "rep_votes"])
    df.insert(0, "year", year)
    return df

congress_by_year = {year: read_congress_year(year) for year in range(1896, 1993, 2)}
congress_by_year[1988].head()

	year	state_code	district	inc	dem_votes	rep_votes
0	1988	1.0	1.0	1.0	176463.0	51985.0
1	1988	1.0	2.0	1.0	143326.0	81965.0
2	1988	1.0	3.0	1.0	147394.0	74275.0
3	1988	1.0	4.0	-1.0	55751.0	147843.0
4	1988	1.0	5.0	-1.0	58612.0	163729.0

Build pairwise vote-swing data

Code

def election_pair(first_year):
    first = congress_by_year[first_year].copy().reset_index(drop=True)
    second = congress_by_year[first_year + 2].copy().reset_index(drop=True)
    out = pd.DataFrame({
        "year": first_year,
        "state_code": first["state_code"],
        "region": np.floor(first["state_code"] / 20).astype(int) + 1,
        "inc": first["inc"],
        "dvote1": first["dem_votes"] / (first["dem_votes"] + first["rep_votes"]),
        "dvote2": second["dem_votes"] / (second["dem_votes"] + second["rep_votes"]),
    })
    out["swing"] = out["dvote2"] - out["dvote1"]
    out["contested"] = (out["dvote1"].sub(0.5).abs() < 0.3) & (out["dvote2"].sub(0.5).abs() < 0.3)
    return out

pairs = pd.concat([election_pair(year) for year in [1948, 1968, 1988]], ignore_index=True)
pairs.groupby(["year", "region"])["contested"].sum().unstack()

region	1	2	3	4	5
year
1948	117.0	127.0	35.0	50.0	NaN
1968	90.0	117.0	67.0	62.0	4.0
1988	64.0	88.0	61.0	69.0	2.0

Reproduce the region-by-election grid

Open seats are plotted darker and larger, following the R example; incumbent races are gray.

Code

fig, axes = plt.subplots(3, 5, figsize=(10, 5.8), sharex=False, sharey=True,
                         gridspec_kw={"width_ratios": [0.9, 1, 1, 1, 1]})

for row, year in enumerate([1948, 1968, 1988]):
    axes[row, 0].axis("off")
    axes[row, 0].text(0.5, 0.5, f"{year}\nto\n{year + 2}", ha="center", va="center", fontsize=12)
    year_df = pairs[pairs["year"] == year]
    for j, region in enumerate([1, 2, 3, 4], start=1):
        ax = axes[row, j]
        ax.axhline(0, color="black", linewidth=0.8)
        ok_region = year_df["region"] == region
        incumbent = ok_region & year_df["contested"] & (year_df["inc"].abs() == 1)
        open_seat = ok_region & year_df["contested"] & (year_df["inc"].abs() == 0)
        ax.scatter(year_df.loc[incumbent, "dvote1"], year_df.loc[incumbent, "swing"], s=8, color="0.6", label="incumbent")
        ax.scatter(year_df.loc[open_seat, "dvote1"], year_df.loc[open_seat, "swing"], s=16, color="black", label="open seat")
        ax.set_xlim(0.2, 0.8)
        ax.set_ylim(-0.4, 0.3)
        ax.set_xticks([0.25, 0.50, 0.75] if row == 2 else [])
        ax.set_xticklabels(["25%", "50%", "75%"] if row == 2 else [])
        ax.set_yticks([-0.25, 0, 0.25])
        ax.set_yticklabels(["-25%", "0", "25%"] if j == 1 else [])
        if row == 0:
            ax.set_title(region_name[region - 1], fontsize=10)
        if j == 1:
            ax.set_ylabel("Vote swing")
        if row == 2 and j == 2:
            ax.set_xlabel("Dem. vote in election 1")
        ax.spines[["top", "right"]].set_visible(False)

fig.tight_layout()

Summary table

Code

summary = (
    pairs[pairs["contested"]]
    .assign(open_seat=lambda d: d["inc"].abs() == 0,
            region_name=lambda d: d["region"].map(dict(enumerate(region_name, start=1))))
    .groupby(["year", "region_name", "open_seat"])
    .agg(n=("swing", "size"), mean_swing=("swing", "mean"), sd_swing=("swing", "std"))
    .reset_index()
)
summary

	year	region_name	open_seat	n	mean_swing	sd_swing
0	1948	Midwest	False	106	-0.040398	0.037885
1	1948	Midwest	True	21	-0.046117	0.045298
2	1948	Northeast	False	106	-0.012844	0.068463
3	1948	Northeast	True	11	-0.002715	0.059017
4	1948	South	False	27	-0.021275	0.039455
5	1948	South	True	8	-0.044085	0.070482
6	1948	West	False	48	-0.019990	0.056912
7	1948	West	True	2	0.019259	0.041893
8	1968	Midwest	False	110	0.046984	0.069304
9	1968	Midwest	True	7	0.031179	0.044490
10	1968	Northeast	False	82	0.017212	0.078239
11	1968	Northeast	True	8	0.018113	0.121714
12	1968	South	False	57	0.024130	0.080206
13	1968	South	True	10	-0.001970	0.093575
14	1968	West	False	59	0.047193	0.079023
15	1968	West	True	3	0.041048	0.148694
16	1988	Midwest	False	85	0.026332	0.089668
17	1988	Midwest	True	3	-0.012539	0.101873
18	1988	Northeast	False	59	0.008190	0.094073
19	1988	Northeast	True	5	0.033152	0.097289
20	1988	South	False	58	-0.008988	0.073789
21	1988	South	True	3	0.030853	0.086048
22	1988	West	False	62	0.007212	0.095932
23	1988	West	True	7	-0.020920	0.094330

# Congress / historical vote-swing plots Source: `Congress/congress_plots.Rmd` This example builds the Chapter 2 congressional vote-swing grid: for selected elections, compare Democratic vote in election 1 to the swing in election 2, faceted by region. The original data files are fixed-width-ish numeric `.asc` files; each row contains district identifiers, incumbency, and the two-party vote counts. ## Load and stack the biennial files ```{python} from pathlib import Path import numpy as np import pandas as pd import matplotlib.pyplot as plt def ros_root(): candidates = [ Path("../../ROS-Examples"), Path("../ROS-Examples"), Path("/Users/alal/tmp/ros-python-book/ROS-Examples"), ] for candidate in candidates: if candidate.exists(): return candidate return candidates[0] root = ros_root() congress_dir = root / "Congress/data" region_name = ["Northeast", "Midwest", "South", "West"] def read_congress_year(year): arr = np.loadtxt(congress_dir / f"{year}.asc") df = pd.DataFrame(arr, columns=["state_code", "district", "inc", "dem_votes", "rep_votes"]) df.insert(0, "year", year) return df congress_by_year = {year: read_congress_year(year) for year in range(1896, 1993, 2)} congress_by_year[1988].head() ``` ## Build pairwise vote-swing data ```{python} def election_pair(first_year): first = congress_by_year[first_year].copy().reset_index(drop=True) second = congress_by_year[first_year + 2].copy().reset_index(drop=True) out = pd.DataFrame({ "year": first_year, "state_code": first["state_code"], "region": np.floor(first["state_code"] / 20).astype(int) + 1, "inc": first["inc"], "dvote1": first["dem_votes"] / (first["dem_votes"] + first["rep_votes"]), "dvote2": second["dem_votes"] / (second["dem_votes"] + second["rep_votes"]), }) out["swing"] = out["dvote2"] - out["dvote1"] out["contested"] = (out["dvote1"].sub(0.5).abs() < 0.3) & (out["dvote2"].sub(0.5).abs() < 0.3) return out pairs = pd.concat([election_pair(year) for year in [1948, 1968, 1988]], ignore_index=True) pairs.groupby(["year", "region"])["contested"].sum().unstack() ``` ## Reproduce the region-by-election grid Open seats are plotted darker and larger, following the R example; incumbent races are gray. ```{python} fig, axes = plt.subplots(3, 5, figsize=(10, 5.8), sharex=False, sharey=True, gridspec_kw={"width_ratios": [0.9, 1, 1, 1, 1]}) for row, year in enumerate([1948, 1968, 1988]): axes[row, 0].axis("off") axes[row, 0].text(0.5, 0.5, f"{year}\nto\n{year + 2}", ha="center", va="center", fontsize=12) year_df = pairs[pairs["year"] == year] for j, region in enumerate([1, 2, 3, 4], start=1): ax = axes[row, j] ax.axhline(0, color="black", linewidth=0.8) ok_region = year_df["region"] == region incumbent = ok_region & year_df["contested"] & (year_df["inc"].abs() == 1) open_seat = ok_region & year_df["contested"] & (year_df["inc"].abs() == 0) ax.scatter(year_df.loc[incumbent, "dvote1"], year_df.loc[incumbent, "swing"], s=8, color="0.6", label="incumbent") ax.scatter(year_df.loc[open_seat, "dvote1"], year_df.loc[open_seat, "swing"], s=16, color="black", label="open seat") ax.set_xlim(0.2, 0.8) ax.set_ylim(-0.4, 0.3) ax.set_xticks([0.25, 0.50, 0.75] if row == 2 else []) ax.set_xticklabels(["25%", "50%", "75%"] if row == 2 else []) ax.set_yticks([-0.25, 0, 0.25]) ax.set_yticklabels(["-25%", "0", "25%"] if j == 1 else []) if row == 0: ax.set_title(region_name[region - 1], fontsize=10) if j == 1: ax.set_ylabel("Vote swing") if row == 2 and j == 2: ax.set_xlabel("Dem. vote in election 1") ax.spines[["top", "right"]].set_visible(False) fig.tight_layout() ``` ## Summary table ```{python} summary = ( pairs[pairs["contested"]] .assign(open_seat=lambda d: d["inc"].abs() == 0, region_name=lambda d: d["region"].map(dict(enumerate(region_name, start=1)))) .groupby(["year", "region_name", "open_seat"]) .agg(n=("swing", "size"), mean_swing=("swing", "mean"), sd_swing=("swing", "std")) .reset_index() ) summary ```