Bootstrap confidence intervals estimate uncertainty by resampling the observed data many times. This is useful when you want a flexible interval estimate without relying entirely on analytic formulas. ### Bootstrapping the Mean
import numpy as np
from scipy import stats
np.random.seed(22)
data = np.random.normal(loc=50, scale=8, size=80)
bootstrap_result = stats.bootstrap((data,), np.mean, confidence_level=0.95, n_resamples=5000, random_state=22)
print(f"Sample mean: {data.mean():.3f}")
print(f"95% bootstrap CI: ({bootstrap_result.confidence_interval.low:.3f}, {bootstrap_result.confidence_interval.high:.3f})")Sample mean: 49.519 95% bootstrap CI: (47.856, 51.332)
### Comparing Bootstrap and t-Based Intervals
import numpy as np
from scipy import stats
np.random.seed(22)
data = np.random.normal(loc=50, scale=8, size=80)
bootstrap_result = stats.bootstrap((data,), np.mean, confidence_level=0.95, n_resamples=5000, random_state=22)
t_interval = stats.t.interval(
confidence=0.95,
df=len(data) - 1,
loc=data.mean(),
scale=stats.sem(data),
)
print(f"Bootstrap CI: ({bootstrap_result.confidence_interval.low:.3f}, {bootstrap_result.confidence_interval.high:.3f})")
print(f"t-based CI: ({t_interval[0]:.3f}, {t_interval[1]:.3f})")Bootstrap CI: (47.856, 51.332) t-based CI: (47.734, 51.304)
### Visualizing the Bootstrap Distribution
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(22)
data = np.random.normal(loc=50, scale=8, size=80)
boot_means = []
for _ in range(4000):
sample = np.random.choice(data, size=len(data), replace=True)
boot_means.append(sample.mean())
boot_means = np.array(boot_means)
ci_low, ci_high = np.percentile(boot_means, [2.5, 97.5])
plt.figure(figsize=(9, 5))
plt.hist(boot_means, bins=35, alpha=0.75)
plt.axvline(data.mean(), color="red", linestyle="--", label="Sample mean")
plt.axvline(ci_low, color="green", linestyle="--", label="95% CI")
plt.axvline(ci_high, color="green", linestyle="--")
plt.title("Bootstrap Distribution of the Mean")
plt.xlabel("Mean")
plt.ylabel("Count")
plt.legend()
plt.show()### Practical Example: Average Delivery Time
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(37)
delivery_times = np.random.normal(loc=32, scale=4.5, size=60)
bootstrap_result = stats.bootstrap((delivery_times,), np.mean, confidence_level=0.95, n_resamples=4000, random_state=37)
print(f"Average delivery time: {delivery_times.mean():.2f} minutes")
print(
"95% bootstrap CI: "
f"({bootstrap_result.confidence_interval.low:.2f}, {bootstrap_result.confidence_interval.high:.2f}) minutes"
)
plt.figure(figsize=(8, 5))
plt.hist(delivery_times, bins=15, alpha=0.7)
plt.axvline(delivery_times.mean(), color="crimson", linestyle="--", linewidth=2)
plt.title("Observed Delivery Times")
plt.xlabel("Minutes")
plt.ylabel("Count")
plt.show()Average delivery time: 32.58 minutes 95% bootstrap CI: (31.38, 33.74) minutes
### Conclusion Bootstrap intervals are a practical way to quantify uncertainty from the data directly. SciPy's `bootstrap` function makes this process much easier to run and explain.