from __future__ import division
import numpy as np
from matplotlib import pyplot as plt
from sklearn import linear_model as lm

# Solutions for exercise sheet 4

def ensure_mat (X):
    if (len(X.shape) == 2):
        return X
    else:
        return X[:,np.newaxis]

def mean_squared_error (v, w):
    """
    Mean squared error between v and w

    Parameters
    ----------
    v, w: {(N,), (N,1)} array_like
          Input vectors
    
    Returns
    -------
    mse : double
    """
    return np.mean((v - w)**2)

def Xpoly (X, d):
    """
    Expand input data with polynomial basis functions

    Parameters
    ----------
    X : {(N,), (N,1)} array_like
        Input vector
    d : int
        Degree of polynomial

    Returns
    -------
    Xp : (N,d+1) array_like
         Expanded input [1, X, X^2, ..., X^d]
    """
    X = ensure_mat(X)
    return np.concatenate([X**i for i in range(d+1)], axis=1)

def load_data ():
    # Load data sets
    trainXY = np.genfromtxt('trainXY.csv', delimiter=',')
    testXY  = np.genfromtxt('testXY.csv',  delimiter=',')

    trainX = ensure_mat(trainXY[:,0])
    trainY = ensure_mat(trainXY[:,1])
    testX  = ensure_mat(testXY[:,0])
    testY  = ensure_mat(testXY[:,1])

    return trainX, trainY, testX, testY

class PolyWrapper (object):
    """

    Wrapper class preprocessing data into polynomial features before
    passing them to the underlying linear model.

    Constructor parameters
    ----------------------
    degree: int
            Degree of polynomial
    lm: LinearModel instance
        lm.fit and lm.predict are called by the respective methods of
        PolyWrapper.
        Linear models from sklearn will work here

    """
    def __init__ (self, degree, lm):
        self.d  = degree
        self.lm = lm

    def fit (self, X, Y):
        Xp = Xpoly(X, self.d)
        self.lm = self.lm.fit(Xp, Y)
        return self

    def predict (self, X):
        Xp = Xpoly(X, self.d)
        return self.lm.predict(Xp)

    def model (self):
        return self.lm

def LOOCV (X, Y, model_constructor):
    """

    Leaving-one-out cross-validation
    
    Parameters
    ----------
    X, Y: array_like (N,1)
    model_constructor: function
                       
                       Will be called without arguments to return a
                       new model (required method model.fit,
                       model.predict)

    Returns
    -------
    mse: array_like (N,1)
         Mean squared errors of out-of-sample predictions

    """
    N = len(Y)
    mse = []
    for k in range(N):
        Xk = np.delete(X, k, axis=0)
        Yk = np.delete(Y, k, axis=0)
        fitted = model_constructor().fit(Xk, Yk)
        mse.append(mean_squared_error(Y[k,:], fitted.predict(X[k,:])))
    return mse

def exercise_4_2 (d):
    trainX, trainY, _, _ = load_data()
    
    alphas = 10**np.linspace(-10,-1,100)
    cv = []
    for alpha in alphas:
        mc = lambda: PolyWrapper(d, lm.Ridge(alpha=alpha, fit_intercept=False))
        cv.append(np.mean(LOOCV(trainX, trainY, mc)))

    plt.plot(alphas, cv)
    plt.xscale('log')

# Bayesian linear regression

class BayesianLinearRegression (object):
    """

    LinearModel class implementing Bayesian linear regression

    Constructor parameters
    ----------------------
    alpha, beta: double
                 Precision of prior and noise

    """
    def __init__(self, alpha, beta):
        self.alpha  = alpha
        self.beta   = beta

    def fit (self, X, Y):
        # Compute the posterior
        # Using simplified formulas with mu0 = 0 and Sigma0 = alpha*I_D

        N, D = X.shape
        # Formula (3.54) from PRML
        SigmaNInv = self.alpha*np.eye(D) + self.beta*np.dot(X.T, X)
        self.SigmaN = np.linalg.inv(SigmaNInv)

        # Formula (3.53) from PRML
        self.muN    = np.dot(self.SigmaN, self.beta*np.dot(X.T, Y))

        # Compute log_evidence
        # Formula (3.82) from PRML
        EmuN = self.beta/2*np.sum((Y - np.dot(X, self.muN))**2) \
               + self.alpha/2*np.dot(self.muN.T, self.muN)
        s, logDetA = np.linalg.slogdet(SigmaNInv)
        # Formula (3.86) from PRML
        self.log_evidence = D/2*np.log(self.alpha) + N/2*np.log(self.beta) - N/2*np.log(2*np.pi) \
                            - EmuN - 1/2*logDetA
        
        return self

    def predict (self, X):
        # Compute the mean of the predictive distribution
        return np.dot(X, self.muN)

    def model (self):
        return self

import scipy.optimize as opt

def logEvidence (trainX, trainY, d, alpha, beta):
    """
    Train model and compute its log_evidence
    """
    if (alpha < 0): return -np.inf
    if (beta < 0):  return -np.inf
    model = PolyWrapper(d, BayesianLinearRegression(alpha,beta))
    model.fit(trainX, trainY)
    return model.model().log_evidence

def show_fit (trainX, trainY, d, alpha, beta):
    """
    Fit model and plot predictive distribution
    """
    poly = PolyWrapper(d, BayesianLinearRegression(alpha,beta))
    poly.fit(trainX, trainY)

    X = ensure_mat(np.linspace(0,1,100))
    Y = poly.predict(X)

    # Compute predictive covariance and show uncertainty
    D = d+1
    sigmaN = poly.model().SigmaN
    # Formula (3.59) from PRML [extended for multiple x values]
    Ycov = 1/beta + np.dot(Xpoly(X,d), np.dot(sigmaN, Xpoly(X,d).T))
    print Ycov.shape
    
    plt.figure(1)
    # +- 1.96 standard deviations is the 95% posterior region
    plt.fill_between(X.squeeze(), Y.squeeze()-1.96*np.sqrt(np.diag(Ycov)), Y.squeeze()+1.96*np.sqrt(np.diag(Ycov)), alpha=0.1, color='grey')
    plt.plot(trainX, trainY, 'k.', X, Y, 'b-')
    print 'Prior variance = ', str(1/alpha)
    print 'Noise variance = ', str(1/beta)
    print logEvidence(trainX, trainY, d, alpha, beta)
        
def exercise_4_3 (d):
    trainX, trainY, _, _ = load_data()

    # optimize evidence
    x = opt.fmin(lambda x: -logEvidence(trainX, trainY, d, x[0],x[1]), np.array([1,1]), maxiter=1000)
    alpha = x[0]
    beta  = x[1]
    
    # Show resulting model fit
    show_fit(trainX, trainY, d, alpha, beta)

# Additional demos

def heatmap (func, x_min, x_max, y_min, y_max, points=75):
    """
    Plot heatmap of function f(x,y) over region :math:`x \in [x_min,
    x_max]` and :math:`y \in [y_min, y_max]`

    Parameters
    ----------
    func: function
          Function of two double arguments
    x_min, x_max: double
    y_min, y_max: double
          Boundaries of plotting region
    points: int
          Number of points plotted in each dimension
          Total number is :math:`points^2`

    """

    X, Y = np.meshgrid(np.linspace(x_min,x_max,points), np.linspace(y_min,y_max,points))

    Z = np.vectorize(func)(X, Y)
    im  = plt.imshow(Z, aspect='auto', extent=[X.min(), X.max(), Y.min(), Y.max()], origin='lower')
    ctr = plt.contour(X, Y, Z, 25)
    plt.colorbar(im)

def evidence_demo (d, points=10):
    # Show marginal likelihood across different values for hyperparameters
    trainX, trainY, testX, testY = load_data()
    X, Y = trainX, trainY
    le = lambda a, b: logEvidence(X, Y, d, 10**a, 10**b)
    heatmap(le, -4, 1.25, -3, 2, points=points)