from __future__ import division import numpy as np from matplotlib import pyplot as plt from sklearn import linear_model as lm # Solutions for exercise sheet 4 def ensure_mat (X): if (len(X.shape) == 2): return X else: return X[:,np.newaxis] def mean_squared_error (v, w): """ Mean squared error between v and w Parameters ---------- v, w: {(N,), (N,1)} array_like Input vectors Returns ------- mse : double """ return np.mean((v - w)**2) def Xpoly (X, d): """ Expand input data with polynomial basis functions Parameters ---------- X : {(N,), (N,1)} array_like Input vector d : int Degree of polynomial Returns ------- Xp : (N,d+1) array_like Expanded input [1, X, X^2, ..., X^d] """ X = ensure_mat(X) return np.concatenate([X**i for i in range(d+1)], axis=1) def load_data (): # Load data sets trainXY = np.genfromtxt('trainXY.csv', delimiter=',') testXY = np.genfromtxt('testXY.csv', delimiter=',') trainX = ensure_mat(trainXY[:,0]) trainY = ensure_mat(trainXY[:,1]) testX = ensure_mat(testXY[:,0]) testY = ensure_mat(testXY[:,1]) return trainX, trainY, testX, testY class PolyWrapper (object): """ Wrapper class preprocessing data into polynomial features before passing them to the underlying linear model. Constructor parameters ---------------------- degree: int Degree of polynomial lm: LinearModel instance lm.fit and lm.predict are called by the respective methods of PolyWrapper. Linear models from sklearn will work here """ def __init__ (self, degree, lm): self.d = degree self.lm = lm def fit (self, X, Y): Xp = Xpoly(X, self.d) self.lm = self.lm.fit(Xp, Y) return self def predict (self, X): Xp = Xpoly(X, self.d) return self.lm.predict(Xp) def model (self): return self.lm def LOOCV (X, Y, model_constructor): """ Leaving-one-out cross-validation Parameters ---------- X, Y: array_like (N,1) model_constructor: function Will be called without arguments to return a new model (required method model.fit, model.predict) Returns ------- mse: array_like (N,1) Mean squared errors of out-of-sample predictions """ N = len(Y) mse = [] for k in range(N): Xk = np.delete(X, k, axis=0) Yk = np.delete(Y, k, axis=0) fitted = model_constructor().fit(Xk, Yk) mse.append(mean_squared_error(Y[k,:], fitted.predict(X[k,:]))) return mse def exercise_4_2 (d): trainX, trainY, _, _ = load_data() alphas = 10**np.linspace(-10,-1,100) cv = [] for alpha in alphas: mc = lambda: PolyWrapper(d, lm.Ridge(alpha=alpha, fit_intercept=False)) cv.append(np.mean(LOOCV(trainX, trainY, mc))) plt.plot(alphas, cv) plt.xscale('log') # Bayesian linear regression class BayesianLinearRegression (object): """ LinearModel class implementing Bayesian linear regression Constructor parameters ---------------------- alpha, beta: double Precision of prior and noise """ def __init__(self, alpha, beta): self.alpha = alpha self.beta = beta def fit (self, X, Y): # Compute the posterior # Using simplified formulas with mu0 = 0 and Sigma0 = alpha*I_D N, D = X.shape # Formula (3.54) from PRML SigmaNInv = self.alpha*np.eye(D) + self.beta*np.dot(X.T, X) self.SigmaN = np.linalg.inv(SigmaNInv) # Formula (3.53) from PRML self.muN = np.dot(self.SigmaN, self.beta*np.dot(X.T, Y)) # Compute log_evidence # Formula (3.82) from PRML EmuN = self.beta/2*np.sum((Y - np.dot(X, self.muN))**2) \ + self.alpha/2*np.dot(self.muN.T, self.muN) s, logDetA = np.linalg.slogdet(SigmaNInv) # Formula (3.86) from PRML self.log_evidence = D/2*np.log(self.alpha) + N/2*np.log(self.beta) - N/2*np.log(2*np.pi) \ - EmuN - 1/2*logDetA return self def predict (self, X): # Compute the mean of the predictive distribution return np.dot(X, self.muN) def model (self): return self import scipy.optimize as opt def logEvidence (trainX, trainY, d, alpha, beta): """ Train model and compute its log_evidence """ if (alpha < 0): return -np.inf if (beta < 0): return -np.inf model = PolyWrapper(d, BayesianLinearRegression(alpha,beta)) model.fit(trainX, trainY) return model.model().log_evidence def show_fit (trainX, trainY, d, alpha, beta): """ Fit model and plot predictive distribution """ poly = PolyWrapper(d, BayesianLinearRegression(alpha,beta)) poly.fit(trainX, trainY) X = ensure_mat(np.linspace(0,1,100)) Y = poly.predict(X) # Compute predictive covariance and show uncertainty D = d+1 sigmaN = poly.model().SigmaN # Formula (3.59) from PRML [extended for multiple x values] Ycov = 1/beta + np.dot(Xpoly(X,d), np.dot(sigmaN, Xpoly(X,d).T)) print Ycov.shape plt.figure(1) # +- 1.96 standard deviations is the 95% posterior region plt.fill_between(X.squeeze(), Y.squeeze()-1.96*np.sqrt(np.diag(Ycov)), Y.squeeze()+1.96*np.sqrt(np.diag(Ycov)), alpha=0.1, color='grey') plt.plot(trainX, trainY, 'k.', X, Y, 'b-') print 'Prior variance = ', str(1/alpha) print 'Noise variance = ', str(1/beta) print logEvidence(trainX, trainY, d, alpha, beta) def exercise_4_3 (d): trainX, trainY, _, _ = load_data() # optimize evidence x = opt.fmin(lambda x: -logEvidence(trainX, trainY, d, x[0],x[1]), np.array([1,1]), maxiter=1000) alpha = x[0] beta = x[1] # Show resulting model fit show_fit(trainX, trainY, d, alpha, beta) # Additional demos def heatmap (func, x_min, x_max, y_min, y_max, points=75): """ Plot heatmap of function f(x,y) over region :math:`x \in [x_min, x_max]` and :math:`y \in [y_min, y_max]` Parameters ---------- func: function Function of two double arguments x_min, x_max: double y_min, y_max: double Boundaries of plotting region points: int Number of points plotted in each dimension Total number is :math:`points^2` """ X, Y = np.meshgrid(np.linspace(x_min,x_max,points), np.linspace(y_min,y_max,points)) Z = np.vectorize(func)(X, Y) im = plt.imshow(Z, aspect='auto', extent=[X.min(), X.max(), Y.min(), Y.max()], origin='lower') ctr = plt.contour(X, Y, Z, 25) plt.colorbar(im) def evidence_demo (d, points=10): # Show marginal likelihood across different values for hyperparameters trainX, trainY, testX, testY = load_data() X, Y = trainX, trainY le = lambda a, b: logEvidence(X, Y, d, 10**a, 10**b) heatmap(le, -4, 1.25, -3, 2, points=points)