When you run many hypothesis tests at once, some small p-values will appear by chance alone. Multiple testing correction helps control that problem so your final list of significant results is more trustworthy. ### Simulating Many P-Values
import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests
np.random.seed(120)
n_tests = 500
n_true_null = 450
p_values = np.concatenate([
stats.uniform.rvs(size=n_true_null),
stats.beta.rvs(0.3, 1, size=n_tests - n_true_null),
])
reject, adjusted_p_values, _, _ = multipletests(p_values, method="fdr_bh", alpha=0.10)
print(f"Rejected hypotheses: {reject.sum()}")
print(f"Adjusted p-values below 0.10: {(adjusted_p_values < 0.10).sum()}")Rejected hypotheses: 3 Adjusted p-values below 0.10: 3
### Comparing Raw and Adjusted Significance Counts
import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests
np.random.seed(120)
n_tests = 500
n_true_null = 450
p_values = np.concatenate([
stats.uniform.rvs(size=n_true_null),
stats.beta.rvs(0.3, 1, size=n_tests - n_true_null),
])
raw_significant = (p_values < 0.05).sum()
reject, adjusted_p_values, _, _ = multipletests(p_values, method="fdr_bh", alpha=0.10)
print(f"Raw p-values below 0.05: {raw_significant}")
print(f"BH discoveries at FDR 0.10: {reject.sum()}")Raw p-values below 0.05: 38 BH discoveries at FDR 0.10: 3
### Visualizing the P-Value Distribution
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(120)
n_tests = 500
n_true_null = 450
p_values = np.concatenate([
stats.uniform.rvs(size=n_true_null),
stats.beta.rvs(0.3, 1, size=n_tests - n_true_null),
])
plt.figure(figsize=(9, 5))
plt.hist(p_values, bins=30, alpha=0.75)
plt.xlabel("P-value")
plt.ylabel("Count")
plt.title("Distribution of P-Values Across Many Tests")
plt.show()### Estimating the False Discovery Share
import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests
np.random.seed(120)
n_tests = 500
n_true_null = 450
p_values = np.concatenate([
stats.uniform.rvs(size=n_true_null),
stats.beta.rvs(0.3, 1, size=n_tests - n_true_null),
])
reject, adjusted_p_values, _, _ = multipletests(p_values, method="fdr_bh", alpha=0.10)
false_discoveries = reject[:n_true_null].sum()
total_discoveries = reject.sum()
print(f"False discoveries among rejected tests: {false_discoveries}")
print(f"Total discoveries: {total_discoveries}")
print(f"Observed false discovery share: {false_discoveries / total_discoveries:.3f}")False discoveries among rejected tests: 0 Total discoveries: 3 Observed false discovery share: 0.000
### Practical Example: Screening Many Features
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.stats.multitest import multipletests
np.random.seed(151)
n_tests = 300
n_true_null = 260
p_values = np.concatenate([
stats.uniform.rvs(size=n_true_null),
stats.beta.rvs(0.25, 1, size=n_tests - n_true_null),
])
reject, adjusted_p_values, _, _ = multipletests(p_values, method="fdr_bh", alpha=0.05)
print(f"Number of selected features: {reject.sum()}")
print(f"Smallest adjusted p-value: {adjusted_p_values.min():.6f}")
plt.figure(figsize=(9, 5))
plt.scatter(range(len(p_values)), np.sort(p_values), s=12)
plt.axhline(0.05, color="red", linestyle="--", linewidth=1)
plt.xlabel("Ordered test index")
plt.ylabel("P-value")
plt.title("Ordered P-Values from Many Feature Tests")
plt.show()Number of selected features: 6 Smallest adjusted p-value: 0.000009
### Conclusion Multiple testing correction is essential whenever you evaluate many hypotheses at once. Combining SciPy-generated p-values with `multipletests` helps reduce false positives while keeping the analysis interpretable.