Correlation Analysis

Using SciPy to analyze the relationship between variables using correlation coefficients

You have unsaved changes
Python
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Generate sample data
np.random.seed(42)
x = np.random.normal(0, 1, 100)
y = 0.5 * x + np.random.normal(0, 0.5, 100)
z = np.random.normal(0, 1, 100)

# Calculate correlation coefficients
pearson_r, pearson_p = stats.pearsonr(x, y)
spearman_r, spearman_p = stats.spearmanr(x, y)
kendall_tau, kendall_p = stats.kendalltau(x, y)

print(f"Pearson correlation: r = {pearson_r:.4f}, p = {pearson_p:.4f}")
print(f"Spearman correlation: r = {spearman_r:.4f}, p = {spearman_p:.4f}")
print(f"Kendall's tau: τ = {kendall_tau:.4f}, p = {kendall_p:.4f}")

# Visualize the correlations
data = np.column_stack((x, y, z))
corr_matrix = np.corrcoef(data.T)

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)
plt.title("Correlation Matrix")
plt.show()

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(x, y, alpha=0.5)
plt.title(f"Scatter Plot (Pearson r = {pearson_r:.4f})")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
Click Run or press shift + ENTER to run code.