# Training a regression model using LightGBM import lightgbm as lgb import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt # Generate a synthetic dataset np.random.seed(42) num_samples = 1000 X = pd.DataFrame({ 'num_rooms': np.random.randint(1, 10, num_samples), 'square_footage': np.random.randint(500, 5000, num_samples), 'age': np.random.randint(1, 100, num_samples) }) y = X['num_rooms'] * 50000 + X['square_footage'] * 100 + X['age'] * -200 + np.random.normal(0, 10000, num_samples) # Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create Dataset for LightGBM dtrain = lgb.Dataset(X_train, label=y_train) dtest = lgb.Dataset(X_test, label=y_test, reference=dtrain) # Define the parameters for the LightGBM model params = { 'objective': 'regression', 'metric': 'rmse', 'boosting_type': 'gbdt', 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9 } # Train the model num_rounds = 10 model = lgb.train(params, dtrain, num_rounds, valid_sets=[dtest]) # Make predictions y_pred = model.predict(X_test, num_iteration=model.best_iteration) # Evaluate the model rmse = np.sqrt(mean_squared_error(y_test, y_pred)) print(f'Root Mean Squared Error: {rmse}') # Plot the true vs predicted values plt.scatter(y_test, y_pred, alpha=0.3) plt.xlabel('True Values') plt.ylabel('Predicted Values') plt.title('True vs Predicted House Prices') plt.grid(True) plt.tight_layout() # Adjust the layout to prevent truncation plt.show()