Content - 32d64021318eb8b9393abeb33169b94a373478fc - 83494da/GPflow/model.py

swh:1:snp:d2bcff616bbf538fe8ce2a9c384200307730292a

Tip revision: af90c6e97f09f0b9a77d2fcc796f8a031ad097e8 authored by alexggmatthews on 06 June 2016, 17:06:36 UTC
Building up cone.
Tip revision: af90c6e
model.py
from __future__ import print_function
from .param import Parameterized
from scipy.optimize import minimize, OptimizeResult
import numpy as np
import tensorflow as tf
from . import hmc
import sys
from functools import wraps


class ObjectiveWrapper(object):
    """
    A simple class to wrap the objective function in order to make it more
    robust.

    The previosly seen state is cached so that we can easily acess it if the
    model crashes.
    """
    def __init__(self, objective):
        self._objective = objective
        self._previous_x = None

    def __call__(self, x):
        f, g = self._objective(x)
        g_is_fin = np.isfinite(g)
        if np.all(g_is_fin):
            self._previous_x = x  # store the last known good value
            return f, g
        else:
            print("Warning: inf or nan in gradient: replacing with zeros")
            return f, np.where(g_is_fin, g, 0.)


class AutoFlow:
    """
    This decorator-class is designed for use on methods of the Model class
    (below).

    The idea is that methods that compute relevant quantities of the model
    (such as predictions) can define a tf graph which we automatically run when
    the (decorated) function is called.

    The syntax looks like:

    >>> class MyModel(Model):
    >>>
    >>>   @AutoFlow((tf.float64), (tf.float64))
    >>>   def my_method(self, x, y):
    >>>       #compute something, returning a tf graph.
    >>>       return tf.foo(self.baz, x, y)

    >>> m = MyModel()
    >>> x = np.random.randn(3,1)
    >>> y = np.random.randn(3,1)
    >>> result = my_method(x, y)

    Now the output of the method call is the _result_ of the computation,
    equivalent to

    >>> m = MyModel()
    >>> x = np.random.randn(3,1)
    >>> y = np.random.randn(3,1)
    >>> x_tf = tf.placeholder(tf.float64)
    >>> y_tf = tf.placeholder(tf.float64)
    >>> with m.tf_mode():
    >>>     graph = tf.foo(m.baz, x_tf, y_tf)
    >>> result = m._session.run(graph,
                                feed_dict={x_tf:x,
                                           y_tf:y,
                                           m._free_vars:m.get_free_state()})

    Not only is the syntax cleaner, but multiple calls to the method will
    result in the graph being constructed only once.

    """
    def __init__(self, *tf_arg_tuples):
        # NB. TF arg_tuples is a list of tuples, each of which can be used to
        # construct a tf placeholder.
        self.tf_arg_tuples = tf_arg_tuples

    def __call__(self, tf_method):
        @wraps(tf_method)
        def runnable(instance, *np_args):
            graph_name = '_' + tf_method.__name__ + '_graph'
            if not hasattr(instance, graph_name):
                if instance._needs_recompile:
                    instance._compile()  # ensures free_vars is up-to-date.
                self.tf_args = [tf.placeholder(*a) for a in self.tf_arg_tuples]
                with instance.tf_mode():
                    graph = tf_method(instance, *self.tf_args)
                setattr(instance, graph_name, graph)
            feed_dict = dict(zip(self.tf_args, np_args))
            feed_dict[instance._free_vars] = instance.get_free_state()
            graph = getattr(instance, graph_name)
            return instance._session.run(graph, feed_dict=feed_dict)
        return runnable


class Model(Parameterized):
    """
    The Model base class.

    To use this class, inherriting classes must define the method

    >>>     build_likelihood(self)

    which returns a tensorflow representation of the model likelihood.

    Param and Parameterized objects that are children of the model can be used
    in the tensorflow expression. Children on the model are defined by simply
    doing:

    >>> m = Model()
    >>> p = Param(1.0)
    >>> m.p = p

    At compile time (i.e. when build_likelihood is called), the `Param` object
    becomes a tensorflow variable.

    The result of build_likelihood() is added to the prior (see Parameterized
    class) and the resulting objective and gradients are compiled into
    self._objective.

    This object has a `_needs_recompile` switch. When any of the child nodes
    change, this object is notified and on optimization (or mcmc) the
    likelihood is recompiled. This allows fixing and constraining parameters,
    but only recompiling lazily.

    This object has a `_free_vars` tensorflow array. This array is ised to
    build the tensorflow representations of the Param objects during
    `make_tf_array`.

    This object defines `optimize` and `sample` to allow for model fitting.
    """
    def __init__(self, name='model'):
        """
        name is a string describing this model.
        """
        Parameterized.__init__(self)
        self._name = name
        self._needs_recompile = True
        self._free_vars = tf.placeholder('float64', name='free_vars')
        self._session = tf.Session()

    @property
    def name(self):
        return self._name

    def _compile(self, optimizer=None):
        """
        compile the tensorflow function "self._objective"
        """
        # Make float32 hack
        float32_hack = False
        if optimizer is not None:
            if tf.float64 not in optimizer._valid_dtypes() and \
                    tf.float32 in optimizer._valid_dtypes():
                print("Using float32 hack for Tensorflow optimizers...")
                float32_hack = True

        self._free_vars = tf.Variable(self.get_free_state())
        if float32_hack:
            x = self.get_free_state().astype(np.float32)
            self._free_vars32 = tf.Variable(x)
            self._free_vars = tf.cast(self._free_vars32, tf.float64)

        self.make_tf_array(self._free_vars)
        with self.tf_mode():
            f = self.build_likelihood() + self.build_prior()
            g, = tf.gradients(f, self._free_vars)

        self._minusF = tf.neg(f, name='objective')
        self._minusG = tf.neg(g, name='grad_objective')

        # The optimiser needs to be part of the computational graph, and needs
        # to be initialised before tf.initialise_all_variables() is called.
        if optimizer is None:
            opt_step = None
        else:
            if float32_hack:
                minus_F_f32 = tf.cast(self._minusF, tf.float32)
                opt_step = optimizer.minimize(minus_F_f32,
                                              var_list=[self._free_vars32])
            else:
                opt_step = optimizer.minimize(self._minusF,
                                              var_list=[self._free_vars])
        init = tf.initialize_all_variables()
        self._session.run(init)

        # build tensorflow functions for computing the likelihood
        print("compiling tensorflow function...")
        sys.stdout.flush()

        def obj(x):
            return self._session.run([self._minusF, self._minusG],
                                     feed_dict={self._free_vars: x})
        self._objective = obj
        print("done")
        sys.stdout.flush()
        self._needs_recompile = False

        return opt_step

    def __setattr__(self, key, value):
        Parameterized.__setattr__(self, key, value)
        # delete any AutoFlow related graphs
        if key == '_needs_recompile' and value:
            for key in dir(self):
                if key[0] == '_' and key[-6:] == '_graph':
                    delattr(self, key)

    @AutoFlow()
    def compute_log_prior(self):
        return self.build_prior()

    @AutoFlow()
    def compute_log_likelihood(self):
        return self.build_likelihood()

    def sample(self, num_samples, Lmax=20, epsilon=0.01, verbose=False):
        """
        Use Hamiltonian Monte Carlo to draw samples from the model posterior.
        """
        if self._needs_recompile:
            self._compile()
        return hmc.sample_HMC(self._objective, num_samples,
                              Lmax, epsilon,
                              x0=self.get_free_state(), verbose=verbose)

    def optimize(self, method='L-BFGS-B', tol=None, callback=None,
                 max_iters=1000, calc_feed_dict=None, **kw):
        """
        Optimize the model by maximizing the likelihood (possibly with the
        priors also) with respect to any free variables.

        method can be one of:
            a string, corresponding to a valid scipy.optimize.minimize string
            a tensorflow optimizer (e.g. tf.optimize.AdaGrad)

        The callback function is execteud by assing the current value of
        self.get_free_state()

        tol is the tolerance passed to scipy.optimize.minimize (ignored
            for tensorflow optimizers)

        max_iters defines the maximum number of iterations

        calc_feed_dict is an optional function which returns a dictionary
        (suitable for tf.Session.run's feed_dict argument)

        In the case of the scipy optimization routines, any additional keyword
        arguments are passed through.

        KeyboardInterrupts are caught and the model is set to the most recent
        value tried by the optimization routine.

        This method returns the results of the call to optimize.minimize, or a
        similar object in the tensorflow case.
        """

        if type(method) is str:
            return self._optimize_np(method, tol, callback, max_iters, **kw)
        else:
            return self._optimize_tf(method, callback, max_iters,
                                     calc_feed_dict, **kw)

    def _optimize_tf(self, method, callback, max_iters, calc_feed_dict):
        """
        Optimize the model using a tensorflow optimizer. see self.optimize()
        """
        opt_step = self._compile(optimizer=method)

        try:
            iteration = 0
            while iteration < max_iters:
                if calc_feed_dict is None:
                    feed_dict = {}
                else:
                    feed_dict = calc_feed_dict()
                self._session.run(opt_step, feed_dict=feed_dict)
                if callback is not None:
                    callback(self._session.run(self._free_vars))
                iteration += 1
        except KeyboardInterrupt:
            print("Caught KeyboardInterrupt, setting model\
                  with most recent state.")
            self.set_state(self._session.run(self._free_vars))
            return None

        final_x = self._session.run(self._free_vars)
        self.set_state(final_x)
        fun, jac = self._objective(final_x)
        r = OptimizeResult(x=final_x,
                           success=True,
                           message="Finished iterations.",
                           fun=fun,
                           jac=jac,
                           status="Finished iterations.")
        return r

    def _optimize_np(self, method='L-BFGS-B', tol=None, callback=None,
                     max_iters=1000, **kw):
        """
        Optimize the model to find the maximum likelihood  or MAP point. Here
        we wrap `scipy.optimize.minimize`, any keyword arguments are passed
        through as `options`.

        method is a string (default 'L-BFGS-B') specifying the scipy
        optimization routine, one of
            - 'Powell'
            - 'CG'
            - 'BFGS'
            - 'Newton-CG'
            - 'L-BFGS-B'
            - 'TNC'
            - 'COBYLA'
            - 'SLSQP'
            - 'dogleg'
        tol is the tolerance to be pased to the optimization routine
        callback is callback function to be passed to the optimization routine
        max_iters is the maximum numebr of iterations (used in the options dict
            for the optimization routine)
        """
        if self._needs_recompile:
            self._compile()

        options = dict(display=True, max_iters=max_iters)
        options.update(kw)

        # LBFGS-B hacks. the options are different, annoyingly.
        if method == 'L-BFGS-B':
            options['maxiter'] = max_iters
            del options['max_iters']
            options['disp'] = options['display']
            del options['display']

        # here's the actual cll to minimize. cathc keyboard errors as harmless.
        obj = ObjectiveWrapper(self._objective)
        try:
            result = minimize(fun=obj,
                              x0=self.get_free_state(),
                              method=method,
                              jac=True,
                              tol=tol,
                              callback=callback,
                              options=options)
        except (KeyboardInterrupt):
            print("Caught KeyboardInterrupt, setting \
                  model with most recent state.")
            self.set_state(obj._previous_x)
            return None

        print("optimization terminated, setting model state")
        self.set_state(result.x)
        return result


class GPModel(Model):
    """
    A base class for Gaussian process models, that is, those of the form

       theta ~ p(theta)
       f ~ GP(m(x), k(x, x'; theta))
       F = f(X)
       Y|F ~ p(Y|F)

    This class mostly adds functionality to compile predictions. To use it,
    inherriting classes must define a build_predict function, which computes
    the means and variances of the latent function. This gets compiled
    similarly to build_likelihood in the Model class.

    These predictions are then pushed through the likelihood to obtain means
    and variances of held out data, self.predict_y.

    The predictions can also be used to compute the (log) density of held-out
    data via self.predict_density.
    """
    def __init__(self, X, Y, kern, likelihood, mean_function, name='model'):
        self.X, self.Y, self.kern, self.likelihood, self.mean_function =\
            X, Y, kern, likelihood, mean_function
        Model.__init__(self, name)

    def build_predict(self):
        raise NotImplementedError

    @AutoFlow((tf.float64, [None, None]))
    def predict_f(self, Xnew):
        """
        Compute the mean and variance of the latent function(s) at the points
        Xnew.
        """
        return self.build_predict(Xnew)

    @AutoFlow((tf.float64, [None, None]))
    def predict_f_full_cov(self, Xnew):
        """
        Compute the mean and covariance matrix of the latent function(s) at the
        points Xnew.
        """
        return self.build_predict(Xnew, full_cov=True)

    @AutoFlow((tf.float64, [None, None]), (tf.int32, []))
    def predict_f_samples(self, Xnew, num_samples):
        """
        Produce samples from the posterior latent function(s) at the points
        Xnew.
        """
        mu, var = self.build_predict(Xnew, full_cov=True)
        samples = []
        for i in range(self.num_latent):
            L = tf.cholesky(var[:, :, i])
            shape = tf.pack([tf.shape(L)[0], num_samples])
            V = tf.random_normal(shape, dtype=tf.float64)
            samples.append(mu[:, i:i + 1] + tf.matmul(L, V))
        return tf.transpose(tf.pack(samples))

    @AutoFlow((tf.float64, [None, None]))
    def predict_y(self, Xnew):
        """
        Compute the mean and variance of held-out data at the points Xnew
        """
        pred_f_mean, pred_f_var = self.build_predict(Xnew)
        return self.likelihood.predict_mean_and_var(pred_f_mean, pred_f_var)

    @AutoFlow((tf.float64, [None, None]), (tf.float64, [None, None]))
    def predict_density(self, Xnew, Ynew):
        """
        Compute the (log) density of the data Ynew at the points Xnew

        Note that this computes the log denisty of the data individually,
        ignoring correlations between them. The result is a matrix the same
        shape as Ynew containing the log densities.
        """
        pred_f_mean, pred_f_var = self.build_predict(Xnew)
        return self.likelihood.predict_density(pred_f_mean, pred_f_var, Ynew)