Normality tests are useful when you need to check whether data are approximately normally distributed. This matters because many statistical methods assume normality, especially for small samples. ### Testing a Sample that is Approximately Normal
import numpy as np
import warnings
from scipy import stats
np.random.seed(11)
warnings.filterwarnings("ignore", category=FutureWarning)
normal_data = np.random.normal(loc=0, scale=1, size=200)
shapiro = stats.shapiro(normal_data)
anderson = stats.anderson(normal_data, dist="norm")
print(f"Shapiro-Wilk statistic: {shapiro.statistic:.3f}")
print(f"Shapiro-Wilk p-value: {shapiro.pvalue:.6f}")
print(f"Anderson-Darling statistic: {anderson.statistic:.3f}")
print("Critical values:", anderson.critical_values)Shapiro-Wilk statistic: 0.995 Shapiro-Wilk p-value: 0.676268 Anderson-Darling statistic: 0.201 Critical values: [0.559 0.629 0.749 0.87 1.031]
### Testing a Clearly Skewed Sample
import numpy as np
import warnings
from scipy import stats
np.random.seed(11)
warnings.filterwarnings("ignore", category=FutureWarning)
skewed_data = np.random.exponential(scale=1.0, size=200)
shapiro = stats.shapiro(skewed_data)
anderson = stats.anderson(skewed_data, dist="norm")
print(f"Shapiro-Wilk statistic: {shapiro.statistic:.3f}")
print(f"Shapiro-Wilk p-value: {shapiro.pvalue:.6f}")
print(f"Anderson-Darling statistic: {anderson.statistic:.3f}")
print("Critical values:", anderson.critical_values)Shapiro-Wilk statistic: 0.865 Shapiro-Wilk p-value: 0.000000 Anderson-Darling statistic: 8.173 Critical values: [0.559 0.629 0.749 0.87 1.031]
### Histograms of the Two Samples
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(11)
normal_data = np.random.normal(loc=0, scale=1, size=200)
skewed_data = np.random.exponential(scale=1.0, size=200)
fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))
axes[0].hist(normal_data, bins=20, alpha=0.75)
axes[0].set_title("Approximately Normal")
axes[1].hist(skewed_data, bins=20, alpha=0.75)
axes[1].set_title("Skewed")
plt.tight_layout()
plt.show()### Q-Q Plots
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(11)
normal_data = np.random.normal(loc=0, scale=1, size=200)
skewed_data = np.random.exponential(scale=1.0, size=200)
fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))
stats.probplot(normal_data, dist="norm", plot=axes[0])
axes[0].set_title("Q-Q Plot: Normal Data")
stats.probplot(skewed_data, dist="norm", plot=axes[1])
axes[1].set_title("Q-Q Plot: Skewed Data")
plt.tight_layout()
plt.show()### Practical Example: Choosing a Test Based on Normality
import numpy as np
import warnings
from scipy import stats
np.random.seed(31)
warnings.filterwarnings("ignore", category=FutureWarning)
sample = np.random.exponential(scale=1.2, size=120)
shapiro = stats.shapiro(sample)
print(f"Shapiro-Wilk p-value: {shapiro.pvalue:.6f}")
if shapiro.pvalue < 0.05:
print("Conclusion: the sample is not well modeled as normal; consider a nonparametric method.")
else:
print("Conclusion: the sample does not show a strong departure from normality.")Shapiro-Wilk p-value: 0.000000 Conclusion: the sample is not well modeled as normal; consider a nonparametric method.
### Conclusion Normality tests work best when combined with visual checks like histograms and Q-Q plots. SciPy gives you both formal tests and the tools to support a more defensible interpretation.