Multiple Hypothesis Testing

Performing multiple hypothesis tests with false discovery rate control using SciPy and StatsModels

import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests

# Generate sample data
n_tests = 1000
n_true_null = 950

p_values = np.concatenate([
    stats.uniform.rvs(size=n_true_null),  # True null hypotheses
    stats.beta.rvs(0.1, 1, size=n_tests - n_true_null)  # False null hypotheses

# Perform Benjamini-Hochberg procedure
reject, corrected_p_values, _, _ = multipletests(p_values, method='fdr_bh', alpha=0.1)

# Print results
print(f"Number of rejected null hypotheses: {np.sum(reject)}")
print(f"False discovery rate: {np.sum(reject[:n_true_null]) / np.sum(reject):.4f}")

# Plot p-value distribution
plt.figure(figsize=(10, 6))
plt.hist(p_values, bins=50, density=True, alpha=0.7)
plt.title("P-value Distribution")