import numpy as np from scipy import stats import matplotlib.pyplot as plt # Create a contingency table observed = np.array([[30, 20, 10], [15, 25, 20]]) # Perform chi-square test chi2, p_value, dof, expected = stats.chi2_contingency(observed) print(f"Chi-square statistic: {chi2:.4f}") print(f"P-value: {p_value:.4f}") print(f"Degrees of freedom: {dof}") # Visualize the observed and expected frequencies fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) ax1.imshow(observed, cmap='Blues') ax1.set_title("Observed Frequencies") ax1.set_xlabel("Category 2") ax1.set_ylabel("Category 1") for i in range(observed.shape[0]): for j in range(observed.shape[1]): ax1.text(j, i, observed[i, j], ha='center', va='center') ax2.imshow(expected, cmap='Blues') ax2.set_title("Expected Frequencies") ax2.set_xlabel("Category 2") ax2.set_ylabel("Category 1") for i in range(expected.shape[0]): for j in range(expected.shape[1]): ax2.text(j, i, f"{expected[i, j]:.1f}", ha='center', va='center') plt.tight_layout() plt.show() # Interpret the results alpha = 0.05 if p_value < alpha: print("Reject the null hypothesis: There is a significant relationship between the variables.") else: print("Fail to reject the null hypothesis: There is no significant relationship between the variables.")