Correlation Analysis

Using SciPy to analyze the relationship between variables using correlation coefficients

Python
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Generate sample data
np.random.seed(42)
x = np.random.normal(0, 1, 100)
y = 0.5 * x + np.random.normal(0, 0.5, 100)
z = np.random.normal(0, 1, 100)

# Calculate correlation coefficients
pearson_r, pearson_p = stats.pearsonr(x, y)
spearman_r, spearman_p = stats.spearmanr(x, y)
kendall_tau, kendall_p = stats.kendalltau(x, y)

print(f"Pearson correlation: r = {pearson_r:.4f}, p = {pearson_p:.4f}")
print(f"Spearman correlation: r = {spearman_r:.4f}, p = {spearman_p:.4f}")
print(f"Kendall's tau: τ = {kendall_tau:.4f}, p = {kendall_p:.4f}")

# Visualize the correlations
data = np.column_stack((x, y, z))
corr_matrix = np.corrcoef(data.T)

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)
plt.title("Correlation Matrix")
plt.show()

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(x, y, alpha=0.5)
plt.title(f"Scatter Plot (Pearson r = {pearson_r:.4f})")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()