Chi-Square Test of Independence

Using SciPy to analyze the relationship between categorical variables using chi-square test

Python
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Create a contingency table
observed = np.array([[30, 20, 10],
                     [15, 25, 20]])

# Perform chi-square test
chi2, p_value, dof, expected = stats.chi2_contingency(observed)

print(f"Chi-square statistic: {chi2:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of freedom: {dof}")

# Visualize the observed and expected frequencies
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.imshow(observed, cmap='Blues')
ax1.set_title("Observed Frequencies")
ax1.set_xlabel("Category 2")
ax1.set_ylabel("Category 1")

for i in range(observed.shape[0]):
    for j in range(observed.shape[1]):
        ax1.text(j, i, observed[i, j], ha='center', va='center')

ax2.imshow(expected, cmap='Blues')
ax2.set_title("Expected Frequencies")
ax2.set_xlabel("Category 2")
ax2.set_ylabel("Category 1")

for i in range(expected.shape[0]):
    for j in range(expected.shape[1]):
        ax2.text(j, i, f"{expected[i, j]:.1f}", ha='center', va='center')

plt.tight_layout()
plt.show()

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant relationship between the variables.")
else:
    print("Fail to reject the null hypothesis: There is no significant relationship between the variables.")