Bootstrap Confidence Interval

Estimating confidence intervals using the bootstrap method with SciPy

Python
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Generate sample data
np.random.seed(42)
data = np.random.normal(loc=10, scale=2, size=100)

# Define bootstrap function
def bootstrap_mean(data, num_bootstrap_samples=10000):
    bootstrap_means = np.zeros(num_bootstrap_samples)
    for i in range(num_bootstrap_samples):
        bootstrap_sample = np.random.choice(data, size=len(data), replace=True)
        bootstrap_means[i] = np.mean(bootstrap_sample)
    return bootstrap_means

# Perform bootstrap
bootstrap_samples = bootstrap_mean(data)

# Calculate confidence interval
ci_lower, ci_upper = np.percentile(bootstrap_samples, [2.5, 97.5])

print(f"Sample mean: {np.mean(data):.4f}")
print(f"95% Bootstrap CI: ({ci_lower:.4f}, {ci_upper:.4f})")

# Compare with t-distribution CI
t_ci_lower, t_ci_upper = stats.t.interval(confidence=0.95, df=len(data)-1,
                                          loc=np.mean(data),
                                          scale=stats.sem(data))
print(f"95% t-distribution CI: ({t_ci_lower:.4f}, {t_ci_upper:.4f})")

# Visualize bootstrap distribution
plt.figure(figsize=(10, 6))
plt.hist(bootstrap_samples, bins=50, density=True, alpha=0.7)
plt.axvline(np.mean(data), color='red', linestyle='dashed', linewidth=2, label='Sample Mean')
plt.axvline(ci_lower, color='green', linestyle='dashed', linewidth=2, label='Bootstrap CI')
plt.axvline(ci_upper, color='green', linestyle='dashed', linewidth=2)
plt.title("Bootstrap Distribution of Sample Mean")
plt.xlabel("Mean")
plt.ylabel("Density")
plt.legend()
plt.show()