# Training a regression model using XGBoost import xgboost as xgb import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error # Generate a synthetic dataset np.random.seed(42) num_samples = 1000 X = pd.DataFrame({ 'num_rooms': np.random.randint(1, 10, num_samples), 'square_footage': np.random.randint(500, 5000, num_samples), 'age': np.random.randint(1, 100, num_samples) }) y = X['num_rooms'] * 50000 + X['square_footage'] * 100 + X['age'] * -200 + np.random.normal(0, 10000, num_samples) # Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create DMatrix for XGBoost dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test, label=y_test) # Define the parameters for the XGBoost model params = { 'objective': 'reg:squarederror', 'max_depth': 3, 'eta': 0.1, 'eval_metric': 'rmse' } # Train the model num_rounds = 10 model = xgb.train(params, dtrain, num_rounds) # Make predictions y_pred = model.predict(dtest) # Evaluate the model rmse = np.sqrt(mean_squared_error(y_test, y_pred)) print(f'Root Mean Squared Error: {rmse}') # Plot the true vs predicted values import matplotlib.pyplot as plt plt.scatter(y_test, y_pred, alpha=0.3) plt.xlabel('True') plt.ylabel('Predicted') plt.title('True vs Predicted') plt.tight_layout() plt.show()