# Copyright 2017 st--, Mark van der Wilk # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from abc import abstractmethod import warnings import numpy as np import tensorflow as tf from . import transforms, kernels, settings from .decors import params_as_tensors, params_as_tensors_for from .params import Parameter, Parameterized from .dispatch import dispatch logger = settings.logger() # --------------------- # Basic inducing points # --------------------- class InducingFeature(Parameterized): """ Abstract base class for inducing features. """ @abstractmethod def __len__(self) -> int: """ Returns the number of features, relevant for example to determine the size of the variational distribution. """ raise NotImplementedError() class InducingPointsBase(InducingFeature): """ Real-space inducing points """ def __init__(self, Z): """ :param Z: the initial positions of the inducing points, size M x D """ super().__init__() self.Z = Parameter(Z, dtype=settings.float_type) def __len__(self): return self.Z.shape[0] class InducingPoints(InducingPointsBase): pass @dispatch(InducingPoints, kernels.Kernel) def Kuu(feat, kern, *, jitter=0.0): with params_as_tensors_for(feat): Kzz = kern.K(feat.Z) Kzz += jitter * tf.eye(len(feat), dtype=settings.dtypes.float_type) return Kzz @dispatch(InducingPoints, kernels.Kernel, object) def Kuf(feat, kern, Xnew): with params_as_tensors_for(feat): Kzx = kern.K(feat.Z, Xnew) return Kzx # ------------------ # Multiscale feature # ------------------ class Multiscale(InducingPointsBase): r""" Multi-scale inducing features Originally proposed in :: @incollection{NIPS2009_3876, title = {Inter-domain Gaussian Processes for Sparse Inference using Inducing Features}, author = {Miguel L\'{a}zaro-Gredilla and An\'{\i}bal Figueiras-Vidal}, booktitle = {Advances in Neural Information Processing Systems 22}, year = {2009}, } """ def __init__(self, Z, scales): super().__init__(Z) self.scales = Parameter(scales, transform=transforms.positive) # Multi-scale feature widths (std. dev. of Gaussian) if self.Z.shape != scales.shape: raise ValueError("Input locations `Z` and `scales` must have the same shape.") # pragma: no cover @staticmethod def _cust_square_dist(A, B, sc): """ Custom version of _square_dist that allows sc to provide per-datapoint length scales. sc: N x M x D. """ return tf.reduce_sum(tf.square((tf.expand_dims(A, 1) - tf.expand_dims(B, 0)) / sc), 2) @dispatch(Multiscale, kernels.RBF, object) def Kuf(feat, kern, Xnew): with params_as_tensors_for(feat, kern): Xnew, _ = kern._slice(Xnew, None) Zmu, Zlen = kern._slice(feat.Z, feat.scales) idlengthscales = kern.lengthscales + Zlen d = feat._cust_square_dist(Xnew, Zmu, idlengthscales) Kuf = tf.transpose(kern.variance * tf.exp(-d / 2) * tf.reshape(tf.reduce_prod(kern.lengthscales / idlengthscales, 1), (1, -1))) return Kuf @dispatch(Multiscale, kernels.RBF) def Kuu(feat, kern, *, jitter=0.0): with params_as_tensors_for(feat, kern): Zmu, Zlen = kern._slice(feat.Z, feat.scales) idlengthscales2 = tf.square(kern.lengthscales + Zlen) sc = tf.sqrt( tf.expand_dims(idlengthscales2, 0) + tf.expand_dims(idlengthscales2, 1) - tf.square( kern.lengthscales)) d = feat._cust_square_dist(Zmu, Zmu, sc) Kzz = kern.variance * tf.exp(-d / 2) * tf.reduce_prod(kern.lengthscales / sc, 2) Kzz += jitter * tf.eye(len(feat), dtype=settings.float_type) return Kzz # --------------------- # InducingPatch feature # --------------------- class InducingPatch(InducingPointsBase): """ Class for interdomain inducing patches, for use with Convolutional kernels. @incollection{vdw2017convgp, title = {Convolutional Gaussian Processes}, author = {van der Wilk, Mark and Rasmussen, Carl Edward and Hensman, James}, booktitle = {Advances in Neural Information Processing Systems 30}, year = {2017}, url = {http://papers.nips.cc/paper/6877-convolutional-gaussian-processes.pdf} } """ pass @dispatch(InducingPatch, kernels.Convolutional, object) def Kuf(feat, kern, Xnew): with params_as_tensors_for(feat, kern): Xp = kern.get_patches(Xnew) # N x num_patches x patch_len bigKzx = kern.basekern.K(feat.Z, Xp) # [M, N, P] Kzx = tf.reduce_sum(bigKzx * kern.weights if hasattr(kern, 'weights') else bigKzx, [2]) return Kzx / kern.num_patches @dispatch(InducingPatch, kernels.Convolutional) def Kuu(feat, kern, jitter=0.0): with params_as_tensors_for(feat, kern): return kern.basekern.K(feat.Z) + jitter * tf.eye(len(feat), dtype=settings.float_type) # ------- # Helpers # ------- def inducingpoint_wrapper(feat, Z): """ Models which used to take only Z can now pass `feat` and `Z` to this method. This method will check for consistency and return the correct feature. This allows backwards compatibility in for the methods. """ if feat is not None and Z is not None: raise ValueError("Cannot pass both an InducingFeature instance and Z values") # pragma: no cover elif feat is None and Z is None: raise ValueError("You must pass either an InducingFeature instance or Z values") # pragma: no cover elif Z is not None: feat = InducingPoints(Z) elif isinstance(feat, np.ndarray): feat = InducingPoints(feat) else: assert isinstance(feat, InducingFeature) # pragma: no cover return feat