Linear Regression

Performing linear regression and analyzing the relationship between variables with SciPy

Python
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Generate sample data
np.random.seed(42)
x = np.linspace(0, 10, 100)
y = 2 * x + 1 + np.random.normal(0, 1, 100)

# Perform linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

# Print results
print(f"Slope: {slope:.4f}")
print(f"Intercept: {intercept:.4f}")
print(f"R-squared: {r_value**2:.4f}")
print(f"P-value: {p_value:.4f}")

# Visualize the data and regression line
plt.figure(figsize=(10, 6))
plt.scatter(x, y, alpha=0.5)
plt.plot(x, slope * x + intercept, color='red', label='Regression Line')
plt.title("Linear Regression")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.show()

# Predict y for a given x
x_new = 7.5
y_pred = slope * x_new + intercept
print(f"\nPredicted y for x = {x_new}: {y_pred:.4f}")

# Calculate confidence interval for the slope
conf_int = stats.t.interval(alpha=0.95, df=len(x)-2, loc=slope, scale=std_err)
print(f"\n95% Confidence Interval for slope: {conf_int}")