import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
# Create a contingency table
observed = np.array([[30, 20, 10],
[15, 25, 20]])
# Perform chi-square test
chi2, p_value, dof, expected = stats.chi2_contingency(observed)
print(f"Chi-square statistic: {chi2:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of freedom: {dof}")
# Visualize the observed and expected frequencies
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
ax1.imshow(observed, cmap='Blues')
ax1.set_title("Observed Frequencies")
ax1.set_xlabel("Category 2")
ax1.set_ylabel("Category 1")
for i in range(observed.shape[0]):
for j in range(observed.shape[1]):
ax1.text(j, i, observed[i, j], ha='center', va='center')
ax2.imshow(expected, cmap='Blues')
ax2.set_title("Expected Frequencies")
ax2.set_xlabel("Category 2")
ax2.set_ylabel("Category 1")
for i in range(expected.shape[0]):
for j in range(expected.shape[1]):
ax2.text(j, i, f"{expected[i, j]:.1f}", ha='center', va='center')
plt.tight_layout()
plt.show()
# Interpret the results
alpha = 0.05
if p_value < alpha:
print("Reject the null hypothesis: There is a significant relationship between the variables.")
else:
print("Fail to reject the null hypothesis: There is no significant relationship between the variables.")
Click Run or press shift + ENTER to run code