Online XGBoost Compiler

Training a regression model using XGBoost

Python
# Training a regression model using XGBoost

import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Generate a synthetic dataset
np.random.seed(42)
num_samples = 1000
X = pd.DataFrame({
    'num_rooms': np.random.randint(1, 10, num_samples),
    'square_footage': np.random.randint(500, 5000, num_samples),
    'age': np.random.randint(1, 100, num_samples)
})
y = X['num_rooms'] * 50000 + X['square_footage'] * 100 + X['age'] * -200 + np.random.normal(0, 10000, num_samples)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define the parameters for the XGBoost model
params = {
    'objective': 'reg:squarederror',
    'max_depth': 3,
    'eta': 0.1,
    'eval_metric': 'rmse'
}

# Train the model
num_rounds = 10
model = xgb.train(params, dtrain, num_rounds)

# Make predictions
y_pred = model.predict(dtest)

# Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'Root Mean Squared Error: {rmse}')

# Plot the true vs predicted values
import matplotlib.pyplot as plt

plt.scatter(y_test, y_pred, alpha=0.3)
plt.xlabel('True')
plt.ylabel('Predicted')
plt.title('True vs Predicted')
plt.tight_layout()
plt.show()
Click Run or press shift + ENTER to run code.