Multiple Hypothesis Testing

Performing multiple hypothesis tests with false discovery rate control using SciPy and StatsModels

You have unsaved changes
Python
import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests

# Generate sample data
np.random.seed(42)
n_tests = 1000
n_true_null = 950

p_values = np.concatenate([
    stats.uniform.rvs(size=n_true_null),  # True null hypotheses
    stats.beta.rvs(0.1, 1, size=n_tests - n_true_null)  # False null hypotheses
])

# Perform Benjamini-Hochberg procedure
reject, corrected_p_values, _, _ = multipletests(p_values, method='fdr_bh', alpha=0.1)

# Print results
print(f"Number of rejected null hypotheses: {np.sum(reject)}")
print(f"False discovery rate: {np.sum(reject[:n_true_null]) / np.sum(reject):.4f}")

# Plot p-value distribution
plt.figure(figsize=(10, 6))
plt.hist(p_values, bins=50, density=True, alpha=0.7)
plt.title("P-value Distribution")
plt.xlabel("P-value")
plt.ylabel("Density")
plt.show()
Click Run or press shift + ENTER to run code.