Online LightGBM Compiler

Training a regression model using LightGBM

Python
# Training a regression model using LightGBM

import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Generate a synthetic dataset
np.random.seed(42)
num_samples = 1000
X = pd.DataFrame({
    'num_rooms': np.random.randint(1, 10, num_samples),
    'square_footage': np.random.randint(500, 5000, num_samples),
    'age': np.random.randint(1, 100, num_samples)
})
y = X['num_rooms'] * 50000 + X['square_footage'] * 100 + X['age'] * -200 + np.random.normal(0, 10000, num_samples)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create Dataset for LightGBM
dtrain = lgb.Dataset(X_train, label=y_train)
dtest = lgb.Dataset(X_test, label=y_test, reference=dtrain)

# Define the parameters for the LightGBM model
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}

# Train the model
num_rounds = 10
model = lgb.train(params, dtrain, num_rounds, valid_sets=[dtest])

# Make predictions
y_pred = model.predict(X_test, num_iteration=model.best_iteration)

# Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'Root Mean Squared Error: {rmse}')

# Plot the true vs predicted values
plt.scatter(y_test, y_pred, alpha=0.3)
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.title('True vs Predicted House Prices')
plt.grid(True)
plt.tight_layout()  # Adjust the layout to prevent truncation
plt.show()
Click Run or press shift + ENTER to run code.