import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from factor_analyzer import calculate_kmo, calculate_bartlett_sphericity # Data baru yang diatur secara manual untuk mencoba mendapatkan KMO yang lebih t # inggi data = np.array([ [98, 92, 95, 90, 88, 96], [94, 90, 88, 92, 91, 93], [97, 95, 96, 94, 93, 97], [91, 88, 90, 87, 89, 92], [95, 92, 94, 93, 90, 95], [90, 87, 89, 88, 85, 91], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97], [90, 87, 89, 88, 85, 91], [95, 92, 94, 93, 90, 95], [91, 88, 90, 87, 89, 92], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97], [90, 87, 89, 88, 85, 91], [95, 92, 94, 93, 90, 95], [91, 88, 90, 87, 89, 92], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97], [90, 87, 89, 88, 85, 91], [95, 92, 94, 93, 90, 95], [91, 88, 90, 87, 89, 92], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97], [90, 87, 89, 88, 85, 91], [95, 92, 94, 93, 90, 95], [91, 88, 90, 87, 89, 92], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97], [90, 87, 89, 88, 85, 91], [95, 92, 94, 93, 90, 95], [91, 88, 90, 87, 89, 92], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97], [90, 87, 89, 88, 85, 91], [95, 92, 94, 93, 90, 95], [91, 88, 90, 87, 89, 92], [93, 89, 92, 90, 87, 94], [96, 94, 95, 92, 90, 96], [92, 90, 91, 89, 88, 92], [94, 91, 93, 90, 89, 93], [97, 95, 96, 94, 93, 97] ]) # Convert to DataFrame data_df = pd.DataFrame(data, columns=["X1", "X2", "X3", "X4", "X5", "Y"]) # Standardize the data (excluding the Y column) scaler = StandardScaler() data_standardized = scaler.fit_transform(data_df.drop(columns=["Y"])) # Calculate KMO kmo_all, kmo_model = calculate_kmo(data_standardized) print(f"KMO: {kmo_model}") # Bartlett’s test chi_square_value, p_value = calculate_bartlett_sphericity(data_standardized) print(f"Bartlett's test: chi-square value = {chi_square_value}, p-value = {p_value}")
KMO: 0.8715690746752215 Bartlett's test: chi-square value = 465.65005448137515, p-value = 4.701677460260469e-94
import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.decomposition import FactorAnalysis import numpy as np # Data data = { "X1": [88, 71, 94, 55, 79, 97, 80, 96, 99, 67, 54, 59, 93, 77, 89, 95, 69, 61, 99, 78, 59, 53, 98, 84, 52, 79, 55, 99, 56, 68, 80, 74, 57, 53, 58, 94, 70, 91, 90, 54, 52, 57, 66, 99, 78, 61, 95, 63, 57, 58, 69, 90, 85, 70, 65, 59, 95, 91, 69, 90], "X2": [84, 94, 98, 68, 68, 83, 77, 75, 97, 63, 76, 98, 99, 68, 90, 77, 55, 90, 65, 58, 53, 67, 69, 56, 84, 78, 88, 54, 61, 93, 75, 89, 97, 55, 95, 98, 87, 93, 62, 99, 98, 71, 67, 60, 93, 65, 77, 54, 91, 53, 54, 82, 64, 91, 71, 87, 66, 60, 54, 66], "X3": [75, 67, 98, 91, 56, 73, 81, 99, 58, 89, 52, 93, 98, 58, 96, 84, 84, 54, 61, 67, 97, 64, 54, 65, 59, 67, 78, 72, 55, 85, 78, 69, 84, 67, 89, 70, 68, 61, 83, 86, 98, 54, 60, 69, 85, 66, 58, 67, 96, 68, 61, 99, 60, 88, 54, 92, 78, 96, 83, 78], "X4": [51, 91, 53, 63, 94, 70, 97, 64, 54, 69, 82, 51, 54, 62, 67, 96, 60, 67, 99, 84, 61, 93, 56, 90, 70, 85, 57, 66, 95, 99, 70, 64, 61, 77, 62, 99, 88, 91, 85, 61, 85, 76, 54, 67, 55, 82, 98, 58, 74, 69, 75, 95, 67, 62, 79, 55, 54, 77, 99, 93], "X5": [72, 59, 84, 72, 83, 52, 88, 82, 54, 68, 98, 59, 54, 94, 91, 88, 78, 97, 92, 98, 67, 53, 98, 66, 54, 88, 56, 62, 81, 52, 97, 61, 89, 98, 88, 97, 92, 61, 54, 81, 53, 72, 78, 66, 53, 75, 84, 77, 58, 99, 62, 61, 52, 87, 53, 76, 69, 97, 92, 53] } data_df = pd.DataFrame(data) # Standardize the data scaler = StandardScaler() data_standardized = scaler.fit_transform(data_df) # Perform factor analysis fa = FactorAnalysis(n_components=5, rotation='varimax') fa.fit(data_standardized) # Get the loadings loadings = fa.components_.T # Create a DataFrame for the factor loadings loadings_df = pd.DataFrame(loadings, index=data_df.columns, columns=[f'Factor{i+1}' for i in range(loadings.shape[1])]) # Plot the factor loadings heatmap plt.figure(figsize=(10, 6)) plt.title('Factor Loadings') plt.xlabel('Factors') plt.ylabel('Variables') plt.imshow(loadings_df, cmap='coolwarm', aspect='auto') plt.colorbar(label='Loading Value') plt.xticks(np.arange(loadings_df.shape[1]), loadings_df.columns) plt.yticks(np.arange(loadings_df.shape[0]), loadings_df.index) plt.show() # Get the explained variance (eigenvalues) ev = np.var(fa.transform(data_standardized), axis=0) # Plot the scree plot plt.figure(figsize=(10, 6)) plt.plot(range(1, len(ev) + 1), ev, marker='o') plt.title('Scree Plot') plt.xlabel('Factors') plt.ylabel('Eigenvalue') plt.grid(True) plt.axhline(y=1, color='r', linestyle='--') plt.show() loadings_df, ev
( Factor1 Factor2 Factor3 Factor4 Factor5 X1 -0.023977 0.227612 -0.001454 0.0 0.0 X2 0.480101 -0.004796 -0.060505 0.0 0.0 X3 0.502411 -0.019103 -0.001600 0.0 0.0 X4 -0.373788 0.178633 -0.028584 0.0 0.0 X5 -0.270388 -0.030686 0.128410 0.0 0.0, array([0.44933773, 0.07996183, 0.02054263, 0. , 0. ]))