Solver Comparison¶
Pinball ships four linear quantile regression solvers. This notebook compares them on problems of increasing size.
| Solver | Method key | Best for |
|---|---|---|
| Barrodale-Roberts | br |
n < 5,000 |
| Frisch-Newton | fn |
5,000 < n < 100,000 |
| Preprocessing + FN | pfn |
n > 100,000 |
| Lasso | lasso |
Sparse / high-dimensional |
In [ ]:
Copied!
import numpy as np
import time
import matplotlib.pyplot as plt
from pinball import QuantileRegressor
plt.style.use('seaborn-v0_8-whitegrid')
rng = np.random.default_rng(42)
import numpy as np
import time
import matplotlib.pyplot as plt
from pinball import QuantileRegressor
plt.style.use('seaborn-v0_8-whitegrid')
rng = np.random.default_rng(42)
Generate Synthetic Data¶
In [ ]:
Copied!
def make_data(n, p=5, rng=rng):
X = rng.normal(size=(n, p))
beta_true = np.arange(1, p + 1, dtype=float)
noise = rng.normal(size=n) * (1 + np.abs(X[:, 0]))
y = X @ beta_true + noise
return X, y, beta_true
X_test, y_test, _ = make_data(1000)
print(f'Shape: X={X_test.shape}, y={y_test.shape}')
def make_data(n, p=5, rng=rng):
X = rng.normal(size=(n, p))
beta_true = np.arange(1, p + 1, dtype=float)
noise = rng.normal(size=n) * (1 + np.abs(X[:, 0]))
y = X @ beta_true + noise
return X, y, beta_true
X_test, y_test, _ = make_data(1000)
print(f'Shape: X={X_test.shape}, y={y_test.shape}')
Timing: BR vs FN vs PFN¶
In [ ]:
Copied!
sizes = [500, 1000, 2000, 5000, 10_000, 20_000, 50_000]
methods = ['br', 'fn', 'pfn']
timings = {m: [] for m in methods}
for n in sizes:
X, y, _ = make_data(n)
for method in methods:
model = QuantileRegressor(tau=0.5, method=method)
t0 = time.perf_counter()
model.fit(X, y)
elapsed = time.perf_counter() - t0
timings[method].append(elapsed)
print(f'n={n:>6d}: BR={timings["br"][-1]:.4f}s '
f'FN={timings["fn"][-1]:.4f}s PFN={timings["pfn"][-1]:.4f}s')
sizes = [500, 1000, 2000, 5000, 10_000, 20_000, 50_000]
methods = ['br', 'fn', 'pfn']
timings = {m: [] for m in methods}
for n in sizes:
X, y, _ = make_data(n)
for method in methods:
model = QuantileRegressor(tau=0.5, method=method)
t0 = time.perf_counter()
model.fit(X, y)
elapsed = time.perf_counter() - t0
timings[method].append(elapsed)
print(f'n={n:>6d}: BR={timings["br"][-1]:.4f}s '
f'FN={timings["fn"][-1]:.4f}s PFN={timings["pfn"][-1]:.4f}s')
In [ ]:
Copied!
plt.figure(figsize=(9, 5))
for method, marker in zip(methods, ['o', 's', '^']):
plt.plot(sizes, timings[method], f'-{marker}', lw=2,
markersize=8, label=method.upper())
plt.xlabel('n (samples)')
plt.ylabel('Time (seconds)')
plt.title('Solver Timing Comparison (p=5, tau=0.5)')
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.grid(True, which='both', alpha=0.3)
plt.show()
plt.figure(figsize=(9, 5))
for method, marker in zip(methods, ['o', 's', '^']):
plt.plot(sizes, timings[method], f'-{marker}', lw=2,
markersize=8, label=method.upper())
plt.xlabel('n (samples)')
plt.ylabel('Time (seconds)')
plt.title('Solver Timing Comparison (p=5, tau=0.5)')
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.grid(True, which='both', alpha=0.3)
plt.show()
Key Observations¶
- BR (simplex) is fastest for small n but grows superlinearly.
- FN (interior point) scales better and dominates for medium n.
- PFN (preprocessing) has startup overhead but wins for large n.
Accuracy Comparison¶
In [ ]:
Copied!
X, y, beta_true = make_data(5000)
print(f'True beta: {beta_true}')
print()
for method in ['br', 'fn', 'pfn']:
model = QuantileRegressor(tau=0.5, method=method)
model.fit(X, y)
max_err = np.max(np.abs(model.coef_ - beta_true))
print(f'{method.upper():>4s}: coef = {model.coef_}, '
f'max|error| = {max_err:.4f}')
X, y, beta_true = make_data(5000)
print(f'True beta: {beta_true}')
print()
for method in ['br', 'fn', 'pfn']:
model = QuantileRegressor(tau=0.5, method=method)
model.fit(X, y)
max_err = np.max(np.abs(model.coef_ - beta_true))
print(f'{method.upper():>4s}: coef = {model.coef_}, '
f'max|error| = {max_err:.4f}')
Lasso: Sparse Feature Selection¶
In [ ]:
Copied!
n, p = 500, 20
X_sparse = rng.normal(size=(n, p))
beta_sparse = np.zeros(p)
beta_sparse[:3] = [2.0, -1.5, 1.0]
y_sparse = X_sparse @ beta_sparse + rng.normal(size=n) * 0.5
model_lasso = QuantileRegressor(tau=0.5, method='lasso')
model_lasso.fit(X_sparse, y_sparse)
model_fn = QuantileRegressor(tau=0.5, method='fn')
model_fn.fit(X_sparse, y_sparse)
plt.figure(figsize=(10, 5))
x_pos = np.arange(p)
width = 0.3
plt.bar(x_pos - width, beta_sparse, width, label='True', alpha=0.8)
plt.bar(x_pos, model_fn.coef_, width, label='FN (no penalty)', alpha=0.8)
plt.bar(x_pos + width, model_lasso.coef_, width, label='Lasso', alpha=0.8)
plt.xlabel('Feature index')
plt.ylabel('Coefficient')
plt.title('Lasso Quantile Regression \u2014 Sparse Recovery')
plt.legend()
plt.xticks(x_pos)
plt.show()
print(f'Non-zero Lasso coefficients: {np.sum(np.abs(model_lasso.coef_) > 0.01)}')
n, p = 500, 20
X_sparse = rng.normal(size=(n, p))
beta_sparse = np.zeros(p)
beta_sparse[:3] = [2.0, -1.5, 1.0]
y_sparse = X_sparse @ beta_sparse + rng.normal(size=n) * 0.5
model_lasso = QuantileRegressor(tau=0.5, method='lasso')
model_lasso.fit(X_sparse, y_sparse)
model_fn = QuantileRegressor(tau=0.5, method='fn')
model_fn.fit(X_sparse, y_sparse)
plt.figure(figsize=(10, 5))
x_pos = np.arange(p)
width = 0.3
plt.bar(x_pos - width, beta_sparse, width, label='True', alpha=0.8)
plt.bar(x_pos, model_fn.coef_, width, label='FN (no penalty)', alpha=0.8)
plt.bar(x_pos + width, model_lasso.coef_, width, label='Lasso', alpha=0.8)
plt.xlabel('Feature index')
plt.ylabel('Coefficient')
plt.title('Lasso Quantile Regression \u2014 Sparse Recovery')
plt.legend()
plt.xticks(x_pos)
plt.show()
print(f'Non-zero Lasso coefficients: {np.sum(np.abs(model_lasso.coef_) > 0.01)}')
BR Solver with Confidence Intervals¶
In [ ]:
Copied!
from pinball.datasets import load_engel
engel = load_engel()
X, y = engel.data, engel.target
model = QuantileRegressor(tau=0.5, method='br',
solver_options={'ci': True, 'alpha': 0.05})
model.fit(X, y)
ci = model.solver_result_.solver_info.get('ci')
if ci is not None:
print('Rank-inversion 95% CI:')
names = ['intercept'] + engel.feature_names
for i, name in enumerate(names):
print(f' {name:>12s}: [{ci[i, 0]:.4f}, {ci[i, 1]:.4f}]')
else:
print('CI not available in solver_info')
from pinball.datasets import load_engel
engel = load_engel()
X, y = engel.data, engel.target
model = QuantileRegressor(tau=0.5, method='br',
solver_options={'ci': True, 'alpha': 0.05})
model.fit(X, y)
ci = model.solver_result_.solver_info.get('ci')
if ci is not None:
print('Rank-inversion 95% CI:')
names = ['intercept'] + engel.feature_names
for i, name in enumerate(names):
print(f' {name:>12s}: [{ci[i, 0]:.4f}, {ci[i, 1]:.4f}]')
else:
print('CI not available in solver_info')