Correlation analysis measures the strength and direction of association between variables. SciPy supports several correlation coefficients, each with different assumptions and use cases. ### Pearson Correlation
import numpy as np
from scipy import stats
np.random.seed(15)
x = np.random.normal(0, 1, 120)
y = 0.8 * x + np.random.normal(0, 0.5, 120)
r, p_value = stats.pearsonr(x, y)
print(f"Pearson r: {r:.3f}")
print(f"P-value: {p_value:.6f}")Pearson r: 0.875 P-value: 0.000000
### Comparing Pearson, Spearman, and Kendall
import numpy as np
from scipy import stats
np.random.seed(15)
x = np.random.normal(0, 1, 120)
y = 0.8 * x + np.random.normal(0, 0.5, 120)
pearson = stats.pearsonr(x, y)
spearman = stats.spearmanr(x, y)
kendall = stats.kendalltau(x, y)
print(f"Pearson: r = {pearson.statistic:.3f}, p = {pearson.pvalue:.6f}")
print(f"Spearman: r = {spearman.statistic:.3f}, p = {spearman.pvalue:.6f}")
print(f"Kendall: tau = {kendall.statistic:.3f}, p = {kendall.pvalue:.6f}")Pearson: r = 0.875, p = 0.000000 Spearman: r = 0.860, p = 0.000000 Kendall: tau = 0.684, p = 0.000000
### Scatter Plot of the Relationship
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(15)
x = np.random.normal(0, 1, 120)
y = 0.8 * x + np.random.normal(0, 0.5, 120)
r, _ = stats.pearsonr(x, y)
plt.figure(figsize=(8, 5))
plt.scatter(x, y, alpha=0.6)
plt.xlabel("X")
plt.ylabel("Y")
plt.title(f"Scatter Plot (Pearson r = {r:.3f})")
plt.grid(alpha=0.3)
plt.show()### Correlation Matrix
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(15)
x = np.random.normal(0, 1, 120)
y = 0.8 * x + np.random.normal(0, 0.5, 120)
z = np.random.normal(0, 1, 120)
matrix = np.corrcoef(np.column_stack([x, y, z]).T)
plt.figure(figsize=(6, 5))
plt.imshow(matrix, cmap="coolwarm", vmin=-1, vmax=1)
plt.xticks([0, 1, 2], ["x", "y", "z"])
plt.yticks([0, 1, 2], ["x", "y", "z"])
for i in range(3):
for j in range(3):
plt.text(j, i, f"{matrix[i, j]:.2f}", ha="center", va="center")
plt.colorbar(label="Correlation")
plt.title("Correlation Matrix")
plt.show()### Practical Example: Study Time and Exam Score
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(29)
study_hours = np.random.uniform(2, 12, 80)
exam_scores = 52 + 3.5 * study_hours + np.random.normal(0, 6, 80)
pearson = stats.pearsonr(study_hours, exam_scores)
spearman = stats.spearmanr(study_hours, exam_scores)
print(f"Pearson r: {pearson.statistic:.3f}, p-value: {pearson.pvalue:.6f}")
print(f"Spearman r: {spearman.statistic:.3f}, p-value: {spearman.pvalue:.6f}")
plt.figure(figsize=(8, 5))
plt.scatter(study_hours, exam_scores, alpha=0.65)
plt.xlabel("Study hours")
plt.ylabel("Exam score")
plt.title("Study Time vs Exam Score")
plt.grid(alpha=0.3)
plt.show()Pearson r: 0.846, p-value: 0.000000 Spearman r: 0.847, p-value: 0.000000
### Conclusion SciPy provides several correlation measures so you can match the statistic to the structure of your data. Combining the coefficient with a plot makes the relationship much easier to interpret.