Content - 48d470d34173663ea2a0389b86a97c623d092933 - 0bc2b76/tests/va_test_conditionals.py

visit type:
https://github.com/GPflow/GPflow

05 April 2024, 20:18:59 UTC
Tip revision: 6bc57345a42f589fb1ce559ba47d1c667581e4ed authored by thevincentadam on 16 April 2018, 14:55:16 UTC
a first mixedmok test (#706)
Tip revision: 6bc5734
va_test_conditionals.py
import tensorflow as tf
import numpy as np

import os
os.environ["CUDA_VISIBLE_DEVICES"]=""

class settings:
    float_type = tf.float64


def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False):
    """
    Given a variable g divided into disjoint subsets g_a and g_b, and distribution p and q such that
      p(g_b) = N(g_b;0,Kmm)
      p(g_a) = N(g_a;0,Knn)
      Cov[g_a,g_b] = Knm
    And
      q(g_b) = N(g_b;f,q_sqrt*q_sqrt^T)
    This method computes the mean and (co)variance of
      q(g_a) = \int q(g_b) p(g_a|g_b)
    :param Kmn: M x N
    :param Kmm: M x M
    :param Knn: N x N (full_cov=True) or  N
    :param f: M x R
    :param full_cov: bool
    :param q_sqrt: None or R x M x M (lower triangular) Does not support q_diag, i.e. M x R
    :param white: bool
    :return: N x R  or N x N x R (full_cov=True)
    """
    # compute kernel stuff
    num_func = tf.shape(f)[1]  # R
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = Knn - tf.matmul(A, A, transpose_a=True)
        shape = tf.stack([num_func, 1, 1])
    else:
        fvar = Knn - tf.reduce_sum(tf.square(A), 0)
        shape = tf.stack([num_func, 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # R x N x N or R x N

    # another backsubstitution in the unwhitened case
    if not white:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(A, f, transpose_a=True)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.matrix_band_part(q_sqrt, -1, 0)  # R x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
            LTA = tf.matmul(L, A_tiled, transpose_a=True)  # R x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True)  # R x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x N
    fvar = tf.transpose(fvar)  # N x R or N x N x R

    return fmean, fvar


def independent_latents_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None,  white=False):
    """
    Given P latent variables g1, ..., gP and disjoint subsets (g1_a, g1_b),...,(gP_a, gP_b)
      p(gi_b) = N(gi_b;0,Kmm(i))   i=1..P
      p(gi_a) = N(gi_a;0,Knn(i))
      Cov[gi_b,gi_a] = Knm(i)
    And
      q(gi_b) = N(gi_b;f(i),q_sqrt(i)*q_sqrt(i)^T)
    This method computes the mean and (co)variance of
      q(gi_a) = \int q(gi_b) p(gi_a|gi_b)

    This is intended to work for either
    * shared Kxxs across the latents ( i.e. Kmm : M x M, Knn : N x N, Knm : N x M )
    * separate Kxxs across the latents ( i.e. Kmm : P x M x M, Knn : P x N x N, Knm : P x N x M )

    The outputs be of shape
    * N x P  ,  N x P  (full_cov = False)
    * N x P  ,  N x N x P  (full_cov = True)

    Remarks:
    * Having P x P output makes no sense for independent latents (former full_cov_output irrelevant)
    * Whether one wants a full_cov (across the input of each gp) output for each gp should be reflected in the shape of Knn
                   _______________________________________________________________________
                  |______________Shared_______________|_______________Separate ___________|
                  |_____full_cov____|__not full_cov___|_____full_cov____|__not full_cov___|
                  |                 |                 |                 |                 |
    :param Kmn:   |    M x N        |     M x N       |    P x M x N    |    P x M x N    |
    :param Kmm:   |    M x M        |     M x N       |    P x M x M    |    P x M x M    |
    :param Knn:   |    N x N        |     N           |    P x N x N    |    P x N        |
                  |_______________________________________________________________________|
    :param f: data matrix, M x P
    :param q_sqrt:    P x M x M   # TODO  (1) no q_diag option ? (2) deal q_sqrt of shape (P x M x P x M)
    :return:    N x P  ,  N x P  or N x N x P (full_cov = True)
    """
    # TODO: Allow broadcasting over L if priors are shared?

    _, P = f.shape
    if Kmn.shape.ndims == 2: # shared Kxxs case
        N, M = Kmn.shape
        shared = True
        Kmn = tf.expand_dims(Kmn,0) # add 'P' dimension to the left
        Knn = tf.expand_dims(Knn,0) # add 'P' dimension to the left
        Kmm = tf.expand_dims(Kmm,0) # add 'P' dimension to the left
    elif Kmn.shape.ndims == 3: # separate Kxxs case
        _, N, M = Kmn.shape
        shared = False
    # if Knn.shape.ndims == 2:
    #     full_cov = True
    # else:
    #     full_cov = False


    Lm = tf.cholesky(Kmm)  #  P x M x M

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)  # P x M x M  *  P x M x N  ->  P x M x N

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = Knn - tf.einsum('pmn,pms->pns',A, A)  # P x M x N, P x M x N -> P x N x N
    elif not full_cov:
        fvar = Knn - tf.einsum('pmn,pmn->pn',A, A)  # P x M x N, P x M x N -> P x N

    # another backsubstitution in the unwhitened case
    if not white:
        A = tf.matrix_triangular_solve(Lm, A)  # P x M x M ,  P x M x N  ->  P x M x N

    fmean = tf.einsum('qmn,mp->np', A, f) # P x M x N , M x P -> N x P


    if q_sqrt is not None:
        Lf = tf.matrix_band_part(q_sqrt, -1, 0)  # P x M x M
        if q_sqrt.shape.ndims == 3:
            LTA = tf.einsum('pmj,qjn->pmn', Lf, A) # P x M x M ,  P x M x N  ->  P x M x N
        else:
            raise NotImplementedError()

        if full_cov:
            fvar = fvar + tf.einsum('pmn,pmo->pno', LTA, LTA) # P x M x N , P x M x N -> P x N x N

        elif not full_cov:
            fvar = fvar + tf.einsum('pmn,pmn->pn', LTA, LTA) # P x M x N , P x M x N -> P x N



    return fmean, fvar


def fully_correlated_conditional(Kmn, Kmm, Knn, f, *, q_sqrt=None, white=False):
    """
    Given P latent variables g1, ..., gP and disjoint subsets (g1_a, g1_b),...,(gP_a, gP_b)
      p(g_b) = N(g_b;0,Kmm)
      p(g_a) = N(g_a;0,Knn)
      Cov[g_a,g_b] = Knm
    And
      q(g_b) = N(g_b;f,q_sqrt*q_sqrt^T)
    This method computes the mean and (co)variance of
      q(g_a) = \int q(g_b) p(g_a|g_b)

    We have
    :param Kmn:    P x M x P x N
    :param Kmm:    P x M x P x M
    :param Knn:    P x N x P x N
    :param f: data matrix, M x P
    :param q_sqrt:    P x M x P x M
    :return:    N x P,  P x N x P x N
    """

    P,M,_,N = Kmn.shape

    # TODO: lots of reshaping going on. Check ordering of dimensions is correct

    # TODO implement a block cholesky as in https://scicomp.stackexchange.com/questions/5050/cholesky-factorization-of-block-matrices
    Kmmr = tf.reshape(Kmm, (P*M, P*M)) # PM x PM
    Lmr = tf.cholesky(Kmmr) # PM x PM

    # Compute the projection matrix A
    Kmnr = tf.reshape(Kmn, (M*P, N*P))  # PM x PN
    Ar = tf.matrix_triangular_solve(Lmr, Kmnr, lower=True)  # PM x PM , PM x PN -> PM x PN

    # compute the covariance due to the conditioning
    Knnr = tf.reshape(Knn, (N*P, N*P))
    fvarr = Knnr - tf.matmul(Ar, Ar, transpose_a=True)  # PM x PN, PM x PN -> PN x PN

    # another backsubstitution in the unwhitened case
    if not white:
        Ar = tf.matrix_triangular_solve(Lmr, Ar)  # PM x PM, PM x PN -> PM x PN

    fr = tf.reshape(f, (P*M,1)) # M x P -> PM
    fmeanr = tf.matmul(fr, Ar, transpose_a=True)  # PM, PM x PN  ->  PN

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 4:
            q_sqrtr = tf.reshape(q_sqrt, (M*P,M*P)) # PM x PM
            Lfr = tf.matrix_band_part(q_sqrtr, -1, 0)  # PM x PM
            LTA = tf.matmul(Lfr, Ar, transpose_a=True)  # PM x PM , PM x PN -> PM x PN
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))

        fvarr = fvarr + tf.matmul(LTA, LTA, transpose_a=True)  # PM x PN, PM x PN -> PN x PN


    return tf.reshape(fmeanr, (N, P)),\
           tf.reshape(fvarr, (P, N, P, N))


# ==========================================
'''
N = 100
M = 10
R = 2


# Non full cov
Kmn = tf.constant( np.eye(N)[:M,:], dtype=settings.float_type)
Kmm = tf.constant( np.eye(M), dtype=settings.float_type)
Knn = tf.constant( np.ones((N,)), dtype=settings.float_type)
f = tf.constant( np.ones((M,R)), dtype=settings.float_type)
full_cov = False

# full cov
Kmn = tf.constant( np.eye(N)[:M,:], dtype=settings.float_type)
Kmm = tf.constant( np.eye(M), dtype=settings.float_type)
Knn = tf.constant( np.ones((N,N)), dtype=settings.float_type)
f = tf.constant( np.ones((M,R)), dtype=settings.float_type)
full_cov = True

q_sqrt = tf.constant( np.array([np.eye(M)
                              for _ in range(R)]), dtype=settings.float_type)
q_sqrt = None
#print(q_sqrt.shape)

m,v = base_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=False)

with tf.Session() as sess:
    print( sess.run(tf.shape(v)) )

'''
# ===========================================
'''

N = 100
M = 10
P = 2






def make_test_input(full_cov,shared):

    if full_cov and shared:

        Kmn = tf.constant( np.ones((M,N)), dtype=settings.float_type)
        Kmm = tf.constant( np.eye(M) , dtype=settings.float_type)
        Knn = tf.constant( np.ones((N,N)), dtype=settings.float_type)
        f = tf.constant( np.ones((M,P)), dtype=settings.float_type)
        q_sqrt = tf.constant( np.array([np.eye(M)
                                       for _ in range(P)]), dtype=settings.float_type)

    elif not full_cov and shared:

        Kmn = tf.constant( np.ones((M,N)), dtype=settings.float_type)
        Kmm = tf.constant( np.eye(M) , dtype=settings.float_type)
        Knn = tf.constant( np.ones((N,)), dtype=settings.float_type)
        f = tf.constant( np.ones((M,P)), dtype=settings.float_type)
        q_sqrt = tf.constant( np.array([np.eye(M)
                                       for _ in range(P)]), dtype=settings.float_type)


    elif full_cov and not shared:
        Kmn = tf.constant( np.ones((P,M,N)), dtype=settings.float_type)
        Kmm = tf.constant( np.array([np.eye(M)  for _ in range(P)]), dtype=settings.float_type)
        Knn = tf.constant( np.ones((P,N,N)), dtype=settings.float_type)
        f = tf.constant( np.ones((M,P)), dtype=settings.float_type)
        q_sqrt = tf.constant( np.array([np.eye(M)
                                       for _ in range(P)]), dtype=settings.float_type)
    elif not full_cov and not shared:

        Kmn = tf.constant( np.ones((P,M,N)), dtype=settings.float_type)
        Kmm = tf.constant( np.array([np.eye(M)  for _ in range(P)]), dtype=settings.float_type)
        Knn = tf.constant( np.ones((P,N)), dtype=settings.float_type)
        f = tf.constant( np.ones((M,P)), dtype=settings.float_type)
        q_sqrt = tf.constant( np.array([np.eye(M)
                                       for _ in range(P)]), dtype=settings.float_type)



    return Kmn, Kmm, Knn, f, q_sqrt


with tf.Session() as sess:

    for full_cov in [True,False]:
        for shared in [True,False]:
            print('full_cov:',full_cov,', shared:',shared)
            Kmn, Kmm, Knn, f, q_sqrt = make_test_input(full_cov,shared)
            m,v = independent_latents_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=False)

            print( sess.run(tf.shape(m)) )
            print( sess.run(tf.shape(v)) )


'''


"""
    :param Kmn:    P x M x P x N
    :param Kmm:    P x M x P x M
    :param Knn:    P x N x P x N
    :param f: data matrix, M x P
    :param q_sqrt:    P x M x P x M
    :return:    N x P,  P x N x P x N
    
"""
P,M,N=3,10,100
Kmn = tf.constant( np.ones((P,M,P,N)), dtype=settings.float_type)

Kmm_np  = np.zeros((1,1,M,M))
Kmm_np[0,0,:,:] = np.eye(M)
Kmm_np = np.tile(Kmm_np,[P,P,1,1])
Kmm_np = np.reshape(Kmm_np,(P,M,P,M))

Kmm = tf.constant( Kmm_np, dtype=settings.float_type)

Knn = tf.constant( np.ones((P,N,P,N)), dtype=settings.float_type)
q_sqrt = tf.constant( Kmm_np, dtype=settings.float_type)
f = tf.constant( np.ones((M,P)), dtype=settings.float_type)


with tf.Session() as sess:

    m,v = fully_correlated_conditional(Kmn, Kmm, Knn, f,  q_sqrt=None, white=False)
    print( sess.run(tf.shape(m)) )
    print( sess.run(tf.shape(v)) )