Tutorials

Bootstrap Confidence Intervals with SciPy

Bootstrap confidence intervals estimate uncertainty by resampling the observed data many times. This is useful when you want a flexible interval estimate without relying entirely on analytic formulas.

### Bootstrapping the Mean

import numpy as np
from scipy import stats

np.random.seed(22)

data = np.random.normal(loc=50, scale=8, size=80)

bootstrap_result = stats.bootstrap((data,), np.mean, confidence_level=0.95, n_resamples=5000, random_state=22)

print(f"Sample mean: {data.mean():.3f}")
print(f"95% bootstrap CI: ({bootstrap_result.confidence_interval.low:.3f}, {bootstrap_result.confidence_interval.high:.3f})")
Sample mean: 49.519
95% bootstrap CI: (47.856, 51.332)
### Comparing Bootstrap and t-Based Intervals

import numpy as np
from scipy import stats

np.random.seed(22)

data = np.random.normal(loc=50, scale=8, size=80)

bootstrap_result = stats.bootstrap((data,), np.mean, confidence_level=0.95, n_resamples=5000, random_state=22)
t_interval = stats.t.interval(
    confidence=0.95,
    df=len(data) - 1,
    loc=data.mean(),
    scale=stats.sem(data),
)

print(f"Bootstrap CI: ({bootstrap_result.confidence_interval.low:.3f}, {bootstrap_result.confidence_interval.high:.3f})")
print(f"t-based CI: ({t_interval[0]:.3f}, {t_interval[1]:.3f})")
Bootstrap CI: (47.856, 51.332)
t-based CI: (47.734, 51.304)
### Visualizing the Bootstrap Distribution

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(22)

data = np.random.normal(loc=50, scale=8, size=80)
boot_means = []
for _ in range(4000):
    sample = np.random.choice(data, size=len(data), replace=True)
    boot_means.append(sample.mean())
boot_means = np.array(boot_means)

ci_low, ci_high = np.percentile(boot_means, [2.5, 97.5])

plt.figure(figsize=(9, 5))
plt.hist(boot_means, bins=35, alpha=0.75)
plt.axvline(data.mean(), color="red", linestyle="--", label="Sample mean")
plt.axvline(ci_low, color="green", linestyle="--", label="95% CI")
plt.axvline(ci_high, color="green", linestyle="--")
plt.title("Bootstrap Distribution of the Mean")
plt.xlabel("Mean")
plt.ylabel("Count")
plt.legend()
plt.show()
### Practical Example: Average Delivery Time

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(37)

delivery_times = np.random.normal(loc=32, scale=4.5, size=60)
bootstrap_result = stats.bootstrap((delivery_times,), np.mean, confidence_level=0.95, n_resamples=4000, random_state=37)

print(f"Average delivery time: {delivery_times.mean():.2f} minutes")
print(
    "95% bootstrap CI: "
    f"({bootstrap_result.confidence_interval.low:.2f}, {bootstrap_result.confidence_interval.high:.2f}) minutes"
)

plt.figure(figsize=(8, 5))
plt.hist(delivery_times, bins=15, alpha=0.7)
plt.axvline(delivery_times.mean(), color="crimson", linestyle="--", linewidth=2)
plt.title("Observed Delivery Times")
plt.xlabel("Minutes")
plt.ylabel("Count")
plt.show()
Average delivery time: 32.58 minutes
95% bootstrap CI: (31.38, 33.74) minutes
### Conclusion

Bootstrap intervals are a practical way to quantify uncertainty from the data directly. SciPy's `bootstrap` function makes this process much easier to run and explain.