# Copyright 2016 James Hensman, Mark van der Wilk, Valentine Svensson, alexggmatthews, fujiisoup # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import abc import numpy as np import tensorflow as tf from .. import settings from ..core.compilable import Build from ..params import Parameterized, DataHolder from ..decors import autoflow from ..mean_functions import Zero from ..likelihoods import SwitchedLikelihood class Model(Parameterized): def __init__(self, name=None): """ Name is a string describing this model. """ super(Model, self).__init__(name=name) self._objective = None self._likelihood_tensor = None @property def objective(self): return self._objective @property def likelihood_tensor(self): return self._likelihood_tensor @autoflow() def compute_log_prior(self): """Compute the log prior of the model.""" return self.prior_tensor @autoflow() def compute_log_likelihood(self): """Compute the log likelihood of the model.""" return self.likelihood_tensor def is_built(self, graph): is_built = super().is_built(graph) if is_built is not Build.YES: return is_built if self._likelihood_tensor is None: return Build.NO return Build.YES def build_objective(self): likelihood = self._build_likelihood() priors = [] for param in self.parameters: unconstrained = param.unconstrained_tensor constrained = param._build_constrained(unconstrained) priors.append(param._build_prior(unconstrained, constrained)) prior = self._build_prior(priors) return self._build_objective(likelihood, prior) def _clear(self): super(Model, self)._clear() self._likelihood_tensor = None self._objective = None def _build(self): super(Model, self)._build() likelihood = self._build_likelihood() prior = self.prior_tensor objective = self._build_objective(likelihood, prior) self._likelihood_tensor = likelihood self._objective = objective def sample_feed_dict(self, sample): tensor_feed_dict = {} for param in self.parameters: if not param.trainable: continue constrained_value = sample[param.pathname] unconstrained_value = param.transform.backward(constrained_value) tensor = param.unconstrained_tensor tensor_feed_dict[tensor] = unconstrained_value return tensor_feed_dict def _build_objective(self, likelihood_tensor, prior_tensor): func = tf.add(likelihood_tensor, prior_tensor, name='nonneg_objective') return tf.negative(func, name='objective') @abc.abstractmethod def _build_likelihood(self): pass class GPModel(Model): r""" A base class for Gaussian process models, that is, those of the form .. math:: :nowrap: \begin{align} \theta & \sim p(\theta) \\ f & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\ f_i & = f(x_i) \\ y_i\,|\,f_i & \sim p(y_i|f_i) \end{align} This class mostly adds functionality to compile predictions. To use it, inheriting classes must define a build_predict function, which computes the means and variances of the latent function. This gets compiled similarly to build_likelihood in the Model class. These predictions are then pushed through the likelihood to obtain means and variances of held out data, self.predict_y. The predictions can also be used to compute the (log) density of held-out data via self.predict_density. For handling another data (Xnew, Ynew), set the new value to self.X and self.Y >>> m.X = Xnew >>> m.Y = Ynew """ def __init__(self, X, Y, kern, likelihood, mean_function, num_latent=None, name=None): super(GPModel, self).__init__(name=name) if num_latent is None: # Note: It's not nice for `Model` to need to be aware of specific # likelihoods as here. However, `num_latent` is a bit more broken # in general, specifically regarding multioutput kernels. We # should fix this in the future. # It also has slightly problematic assumptions re the output # dimensions of mean_function. num_latent = Y.shape[1] if isinstance(likelihood, SwitchedLikelihood): # the SwitchedLikelihood partitions/stitches based on the last # column in Y, but we should not add a separate latent GP for # this! hence decrement by 1 assert num_latent >= 2 num_latent -= 1 self.num_latent = num_latent self.mean_function = mean_function or Zero(output_dim=self.num_latent) self.kern = kern self.likelihood = likelihood if isinstance(X, np.ndarray): # X is a data matrix; each row represents one instance X = DataHolder(X) if isinstance(Y, np.ndarray): # Y is a data matrix, rows correspond to the rows in X, # columns are treated independently Y = DataHolder(Y) self.X, self.Y = X, Y @autoflow((settings.float_type, [None, None])) def predict_f(self, Xnew): """ Compute the mean and variance of the latent function(s) at the points Xnew. """ return self._build_predict(Xnew) @autoflow((settings.float_type, [None, None])) def predict_f_full_cov(self, Xnew): """ Compute the mean and covariance matrix of the latent function(s) at the points Xnew. """ return self._build_predict(Xnew, full_cov=True) @autoflow((settings.float_type, [None, None]), (tf.int32, [])) def predict_f_samples(self, Xnew, num_samples): """ Produce samples from the posterior latent function(s) at the points Xnew. """ mu, var = self._build_predict(Xnew, full_cov=True) # N x P, # P x N x N jitter = tf.eye(tf.shape(mu)[0], dtype=settings.float_type) * settings.jitter samples = [] for i in range(self.num_latent): L = tf.cholesky(var[i, :, :] + jitter) shape = tf.stack([tf.shape(L)[0], num_samples]) V = tf.random_normal(shape, dtype=settings.float_type) samples.append(mu[:, i:i + 1] + tf.matmul(L, V)) return tf.transpose(tf.stack(samples)) @autoflow((settings.float_type, [None, None])) def predict_y(self, Xnew): """ Compute the mean and variance of held-out data at the points Xnew """ pred_f_mean, pred_f_var = self._build_predict(Xnew) return self.likelihood.predict_mean_and_var(pred_f_mean, pred_f_var) @autoflow((settings.float_type, [None, None]), (settings.float_type, [None, None])) def predict_density(self, Xnew, Ynew): """ Compute the (log) density of the data Ynew at the points Xnew Note that this computes the log density of the data individually, ignoring correlations between them. The result is a matrix the same shape as Ynew containing the log densities. """ pred_f_mean, pred_f_var = self._build_predict(Xnew) return self.likelihood.predict_density(pred_f_mean, pred_f_var, Ynew) @abc.abstractmethod def _build_predict(self, *args, **kwargs): pass