https://github.com/GPflow/GPflow
Tip revision: 36230293422c6af5d956567162bba8c84a2f98ec authored by Rasmus Bonnevie on 02 August 2017, 15:43:05 UTC
Merge branch 'blockkernel' into fast-grad
Merge branch 'blockkernel' into fast-grad
Tip revision: 3623029
gradkern.py
import numpy as np
import tensorflow as tf
import GPflow
from GPflow.param import Param
from GPflow import transforms
import pdb
import matplotlib.pyplot as plt
from functools import partial
from multikernel import BlockKernel
from GPflow._settings import settings
float_type = settings.dtypes.float_type
def find_partitions(collection):
'''Take an iterable `collection` and return a generator which produces
all unique partitions as lists-of-lists with every object being a member of
one set per partition.
alexis @ http://stackoverflow.com/questions/19368375/set-partitions-in-python
'''
if len(collection) == 1:
yield ( tuple(collection), )
return
first = collection[0]
for smaller in find_partitions(collection[1:]):
# insert `first` in each of the subpartition's subsets
smaller = tuple(smaller)
for n, subset in enumerate(smaller):
yield smaller[:n] + (( first, ) + subset, ) + smaller[n+1:]
# put `first` in its own subset
yield ( ( first, ), ) + smaller
def find_partitions_old(collection):
'''Take an iterable `collection` and return a generator which produces
all unique partitions as lists-of-lists with every object being a member of
one set per partition.
alexis @ http://stackoverflow.com/questions/19368375/set-partitions-in-python
'''
if len(collection) == 1:
yield [ collection ]
return
first = collection[0]
for smaller in find_partitions(collection[1:]):
# insert `first` in each of the subpartition's subsets
for n, subset in enumerate(smaller):
yield smaller[:n] + [[ first ] + subset] + smaller[n+1:]
# put `first` in its own subset
yield [ [ first ] ] + smaller
class StationaryGradKernel(BlockKernel):
''' Abstract kernel class which allows arbitrary derivative observations for
any differentiable stationary kernel. Computes each kernel matrix as a
composition of block matrices and automatically generates appropriate kernel
functions if given derivatives of the kernel matrix wrt squared distance.
To extend, inherit and replace `derivative_base` attribute with list of
the 0th, 1st, and 2nd derivative of desired kernel k(tau) with respect to squared
distance tau. Defaults to RBF/squared exponential.
'''
def __init__(self, input_dim, active_dims = None):
groups = input_dim
BlockKernel.__init__(self, input_dim, groups, active_dims)
self.variance = GPflow.param.Param(0.1, transforms.positive)
self.lengthscales = GPflow.param.Param(1.*np.ones(input_dim-1), transforms.positive)
self.derivative_base = [lambda tau: self.variance*tf.exp(-0.5*tau), lambda tau: -0.5*self.variance*tf.exp(-0.5*tau), lambda tau: 0.25*self.variance*tf.exp(-0.5*tau)]
self.kerns = [[self._kernel_factory(i,j) for j in range(groups)] for i in range(groups)]
self.partitions = [find_partitions(list(range(n+1))) for n in range(2)]
self.partitions = [[partition for partition in all_partitions
if np.all([len(subset)<3 for subset in partition])]
for all_partitions in self.partitions]
print('initialized!')
def subK(self, index, X, X2 = None):
i, j = index
return self.kerns[i][j](X, X2)
def subKdiag(self, index, X):
K = self.subK((index, index), X, X)
return tf.diag_part(K)
def square_dist(self, X, X2):
'''Function `squared_dist` identical to method in `Stationary`.'''
X = X / self.lengthscales
Xs = tf.reduce_sum(tf.square(X), 1)
if X2 is None:
return -2 * tf.matmul(X, tf.transpose(X)) + \
tf.reshape(Xs, (-1, 1)) + tf.reshape(Xs, (1, -1))
else:
X2 = X2 / self.lengthscales
X2s = tf.reduce_sum(tf.square(X2), 1)
return -2 * tf.matmul(X, tf.transpose(X2)) + \
tf.reshape(Xs, (-1, 1)) + tf.reshape(X2s, (1, -1))
def square_dist_d(self, X, X2, diff_x=(), diff_y=(), precomputed_delta = None):
'''Take derivative of square_dist(x,y) wrt dimensions `diff_x` of first
argument and `diff_y` of second argument.
Parameters:
- `X` and `X2`: arguments for `square_dist`.
- `diff_x`: Tuple of integers. Dimensions to differentiate with respect
to in the first argument of `square_dist(x,y)`.
- `diff_y`: Tuple of integers. Dimensions to differentiate with respect
to in the second argument of `square_dist(x,y)`.
Return:
- Appropriate derivative as a Tensorflow Tensor.
'''
x_order = len(diff_x)
y_order = len(diff_y)
diff_order = x_order + y_order
dx = diff_x + diff_y
Zero = tf.zeros((tf.shape(X)[0], tf.shape(X)[0] if X2 is None else tf.shape(X2)[0]), dtype=float_type)
One = tf.ones((tf.shape(X)[0], tf.shape(X)[0] if X2 is None else tf.shape(X2)[0]), dtype=float_type)
if diff_order > 2:
#higher derivatives vanish
return Zero
elif diff_order == 0:
#if we don't take the derivative, return the function
return self.square_dist(X, X2)
elif diff_order == 1:
if precomputed_delta is None:
X = X / self.lengthscales ** 2
if X2 is None:
X2 = X
else:
X2 = X2 / self.lengthscales ** 2
delta = tf.expand_dims(X[:,dx[0]], 1) - tf.expand_dims(X2[:,dx[0]], 0)
else:
delta = precomputed_delta[:,:,dx[0]]
#sign-flip if y derivative
if x_order > 0:
return 2.*delta
elif y_order > 0:
return -2.*delta
elif diff_order == 2:
#only equal-index dimension pairs are non-zero
i, j = dx
if i == j:
k = One*(2./self.lengthscales[i]**2.)
if x_order == y_order:
return -k
else:
return k
else:
return Zero
raise RuntimeError
def ind2dx(self, index):
'''Convert a linear index into a tuple of integers to differentiate with respect to.'''
return () if index == 0 else (index-1,)
def kernel_derivative(self, X, X2, diff_x=(), diff_y=()):
'''Calculates derivative of stationary kernel by decomposing into
derivatives of the kernel wrt the distance and derivatives of the distance
wrt the inputs. Procedurally generates each term of the derivative by
iterating over all possible products of mixed derivatives.
Parameters:
- `X` and `X2`: Numpy/TF array. Inputs over which the kernel is evaluated.
- `diff_x` and `diff_y`: Tuples of
Return:
- `result` -
'''
X = X / self.lengthscales ** 2
if X2 is None:
X2 = X
else:
X2 = X2 / self.lengthscales ** 2
delta = tf.expand_dims(X, 1) - tf.expand_dims(X2, 0)
dist_d = partial(self.square_dist_d, X=X, X2=X2, precomputed_delta=delta)
dx = diff_x + diff_y #[(x,'x') for x in diff_x] + [(y,'y') for y in diff_y]
x_order = len(diff_x)
y_order = len(diff_y)
diff_keys = ['x']*x_order + ['y']*y_order
diff_order = len(dx)
if diff_order == 0:
return self.derivative_base[0](self.square_dist(X, X2))
dx_combinations = self.partitions[diff_order-1]
unique_subsets = []
for partition in self.partitions[diff_order-1]:
for subset in partition:
if not subset in unique_subsets:
unique_subsets.append(tuple(subset))
all_derivatives = dict([(elem, dist_d(
diff_x=tuple(dx[i] for i in elem if diff_keys[i] is 'x'),
diff_y=tuple(dx[i] for i in elem if diff_keys[i] is 'y')))
for elem in unique_subsets])
order_factor = [self.derivative_base[order](self.square_dist(X, X2))
for order in range(diff_order+1)]
terms = []
for partition in dx_combinations:
order = len(partition)
derivatives = [all_derivatives[elem] for elem in partition]
terms.append(order_factor[order]*tf.foldl(tf.multiply, derivatives))
result = tf.foldl(tf.add, terms)
return result
def _kernel_factory(self, i, j):
'''Return a function that calculates proper sub-kernel'''
diff_x = self.ind2dx(i)
diff_y = self.ind2dx(j)
def f(X, X2, precomputed_delta = None):
return self.kernel_derivative(X, X2, diff_x=diff_x, diff_y=diff_y)
return f
class StationaryHessianKernel(StationaryGradKernel):
'''Generalizes StationaryGradKernel to second deriatives'''
def __init__(self, input_dim, active_dims = None):
ndim = input_dim-1
groups = 1 + ndim + ((ndim)*(ndim+1))//2
BlockKernel.__init__(self, input_dim, groups, active_dims)
self.variance = GPflow.param.Param(0.1, transforms.positive)
self.lengthscales = GPflow.param.Param(1.*np.ones(input_dim-1), transforms.positive)
self.tril_indices = np.column_stack(np.tril_indices(ndim))
self.derivative_base = [lambda tau: self.variance*tf.exp(-0.5*tau), lambda tau: -0.5*self.variance*tf.exp(-0.5*tau), lambda tau: 0.25*self.variance*tf.exp(-0.5*tau),
lambda tau: -0.125*self.variance*tf.exp(-0.5*tau), lambda tau: 0.0625*self.variance*tf.exp(-0.5*tau)]
#self.derivative_base = [lambda tau: self.variance*(-0.5)**n*tf.exp(-0.5*tau) for n in range(5)]
self.kerns = [[self._kernel_factory(i,j) for j in range(groups)] for i in range(groups)]
self.partitions = [find_partitions(list(range(n+1))) for n in range(4)]
self.partitions = [[partition for partition in all_partitions
if np.all([len(subset)<3 for subset in partition])]
for all_partitions in self.partitions]
def ind2dx(self, i):
if i == 0:
return ()
elif i<self.input_dim:
return (i-1,)
else:
return tuple(self.tril_indices[i-self.input_dim,:])
d = 1 #dimension
groups = 3 #observation types
res = 100 #plot resolution
#number of function, derivative, and second derivative observations
Ns = [10,5,0]
N = np.sum(Ns)
#generate data and annotate with observation type
ind = np.vstack([n*np.ones((Ns[n],1)) for n in range(3)])
val = np.vstack([np.linspace(0,1,n).reshape((-1,1)) for n in Ns])
X = np.concatenate([ind, val],1)
#test function and derivatives
f = lambda x: np.exp(-x)
fp = lambda x: -np.exp(-x)
fpp = lambda x: np.exp(-x)
y = np.vstack([fun(val[ind == i]).reshape((-1,1)) for fun, i in zip([f,fp,fpp], range(groups))])
#define kernels and GPs
k = StationaryHessianKernel(d+1)
kg = StationaryGradKernel(d+1)
m = GPflow.gpr.GPR(X, y, k)
mg = GPflow.gpr.GPR(X, y, kg)
m.likelihood.variance = 0.1
mg.likelihood.variance = 0.1
#compute kernel matrices
K = k.compute_K(X, X)
Kg = kg.compute_K(X, X)
#predict function, derivative, and second derivative
x = np.column_stack([np.linspace(-1,1, res),np.zeros(res)])
predlevel = lambda i: m.predict_f(np.column_stack([i*np.ones(res),x]))
mu, var = predlevel(0)
mug, varg = predlevel(1)
mug2, varg = predlevel(2)
#plot
pf = plt.plot(x[:,0],mu,label='Function')
#plt.scatter(X[X[:,0]==0,1],y[X[:,0]==0],color=pf[0].get_color())
pfp = plt.plot(x[:,0],mug,label='Derivative')
#plt.scatter(X[X[:,0]==1,1],y[X[:,0]==1],color=pfp[0].get_color())
pfpp = plt.plot(x[:,0],mug2,label='Derivative2')
#plt.scatter(*X[X[:,0]==2,1].T,color=pfpp[0].get_color())
plt.hlines(0, -1, 1)
plt.legend()