Source code for src.util

"""
Utilities for the model classes
"""

from typing import Optional

import tensorflow as tf
from gpflow import default_float


[docs]def conditional_from_precision_sites_white(
    Kuu: tf.Tensor,
    Kff: tf.Tensor,
    Kuf: tf.Tensor,
    l: tf.Tensor,
    L: tf.Tensor = None,
    L2=None,
    jitter=1e-9,
):
    """
    Given a g₁ and g2, and distribution p and q such that
      p(g₂) = N(g₂; 0, Kuu)

      p(g₁) = N(g₁; 0, Kff)
      p(g₁ | g₂) = N(g₁; Kfu (Kuu⁻¹) g₂, Kff - Kfu (Kuu⁻¹) Kuf)

    And  q(g₂) = N(g₂; μ, Σ) such that
        Σ⁻¹  = Kuu⁻¹  + Kuu⁻¹LLᵀKuu⁻¹
        Σ⁻¹μ = Kuu⁻¹l

    This method computes the mean and (co)variance of
      q(g₁) = ∫ q(g₂) p(g₁ | g₂) dg₂ = N(g₂; μ*, Σ**)

    with
    Σ** = k** - kfu Kuu⁻¹ kuf - kfu Kuu⁻¹ Σ Kuu⁻¹ kuf
        = k** - kfu Kuu⁻¹kuf + kfu (Kuu + LLᵀ)⁻¹ kuf
    μ* = k*u Kuu⁻¹ m
       = k*u Kuu⁻¹ Λ⁻¹ Kuu⁻¹ l
       = k*u (Kuu + LLᵀ)⁻¹ l

    Inputs:
    :param Kuu: tensor M x M
    :param Kff: tensor N x 1
    :param Kuf: tensor M x N
    :param L: tensor L x M x M
    :param l: tensor M x L
    """

    shape_constraints = [
        (Kuu, ["M", "M"]),
        (Kuf, ["M", "N"]),
        (Kff, ["N", 1]),
        (l, ["M", "L"]),
    ]

    if L2 is not None:
        shape_constraints.append(
            (L2, ["L", "M", "M"]),
        )
    if L is not None:
        shape_constraints.append(
            (L, ["L", "M", "M"]),
        )
    tf.debugging.assert_shapes(
        shape_constraints,
        message="conditional_from_precision_sites() arguments "
        "[Note that this check verifies the shape of an alternative "
        "representation of Kmn. See the docs for the actual expected "
        "shape.]",
    )

    if L2 is None:
        L2 = L @ tf.linalg.matrix_transpose(L)

    m = Kuu.shape[-1]
    I = tf.eye(m, dtype=tf.float64)
    R = L2 + Kuu + I * jitter
    LR = tf.linalg.cholesky(R)
    LA = tf.linalg.cholesky(Kuu)[None]

    tmp1 = tf.linalg.triangular_solve(LR, Kuf)
    tmp2 = tf.linalg.triangular_solve(LA, Kuf)

    cov = Kff - tf.linalg.matrix_transpose(
        tf.reduce_sum(tf.square(tmp2), axis=-2) - tf.reduce_sum(tf.square(tmp1), axis=-2)
    )
    mean = tf.matmul(Kuf, tf.linalg.cholesky_solve(LR, l), transpose_a=True)[0]
    return mean, cov


[docs]def conditional_from_precision_sites(
    Kuu: tf.Tensor,
    Kff: tf.Tensor,
    Kuf: tf.Tensor,
    l: tf.Tensor,
    L: tf.Tensor = None,
    L2=None,
):
    """
    Given a g₁ and g2, and distribution p and q such that
      p(g₂) = N(g₂; 0, Kuu)

      p(g₁) = N(g₁; 0, Kff)
      p(g₁ | g₂) = N(g₁; Kfu (Kuu⁻¹) g₂, Kff - Kfu (Kuu⁻¹) Kuf)

    And  q(g₂) = N(g₂; μ, Σ) such that
        Σ⁻¹  = Kuu⁻¹  + LLᵀ
        Σ⁻¹μ = l

    This method computes the mean and (co)variance of
      q(g₁) = ∫ q(g₂) p(g₁ | g₂) dg₂ = N(g₂; μ*, Σ**)

    with
    Σ** = k** - kfu Kuu⁻¹ kuf - kfu Kuu⁻¹ Σ Kuu⁻¹ kuf
        = k** - kfu Kuu⁻¹kuf + kfu Kuu⁻¹ (Kuu⁻¹  + LLᵀ)⁻¹ Kuu⁻¹ kuf
        = k** - kfu Kuu⁻¹kuf + kfu Kuu⁻¹ (Kuu - Kuu L(I + LᵀKuuL)⁻¹Lᵀ Kuu) Kuu⁻¹ kuf
        = k** - kfu L(I + LᵀKuuL)⁻¹Lᵀ kuf
        = k** - kfu LW⁻¹Lᵀ kuf
        = k** - kfu L Lw⁻ᵀLw⁻¹ Lᵀ kuf
        = k** - (Lw⁻¹ Lᵀ kuf)ᵀ Lw⁻¹ Lᵀ kuf
        = k** - (D kuf)ᵀ D kuf

    μ* = k*u Kuu⁻¹ m
       = k*u Kuu⁻¹ Σ l
       = k*u Kuu⁻¹ (Kuu⁻¹  + LLᵀ)⁻¹ l
       = k*u Kuu⁻¹ (Kuu - Kuu L(I + LᵀKuuL)⁻¹Lᵀ Kuu) l
       = k*u l - k*u L(I + LᵀKuuL)⁻¹Lᵀ Kuu l
       = k*u l - k*u LW⁻¹Lᵀ Kuu l
       = k*u l - (Lw⁻¹ Lᵀ kuf)ᵀ Lw⁻¹ Lᵀ Kuu l
       = k*u l - (D kuf)ᵀ D Kuu l

    Inputs:
    :param Kuu: tensor M x M
    :param Kff: tensor N x 1
    :param Kuf: tensor M x N
    :param L: tensor L x M x M
    :param l: tensor M x L
    """

    shape_constraints = [
        (Kuu, ["M", "M"]),
        (Kuf, ["M", "N"]),
        (Kff, ["N", 1]),
        (l, ["M", "L"]),
    ]

    if L2 is not None:
        shape_constraints.append(
            (L2, ["L", "M", "M"]),
        )
    if L is not None:
        shape_constraints.append(
            (L, ["L", "M", "M"]),
        )
    tf.debugging.assert_shapes(
        shape_constraints,
        message="conditional_from_precision_sites() arguments "
        "[Note that this check verifies the shape of an alternative "
        "representation of Kmn. See the docs for the actual expected "
        "shape.]",
    )

    if L is None:
        L = tf.linalg.cholesky(L2)

    m = Kuu.shape[-1]
    Id = tf.eye(m, dtype=default_float())
    C = tf.linalg.cholesky(Kuu)

    # W = I + Lₜᵀ Lₚ Lₚᵀ Lₜ, chol(W)
    CtL = tf.matmul(C, L, transpose_a=True)
    W = Id + tf.matmul(CtL, CtL, transpose_a=True)
    chol_W = tf.linalg.cholesky(W)

    D = tf.linalg.triangular_solve(chol_W, tf.linalg.matrix_transpose(L))
    tmp = tf.matmul(D, Kuf)

    mean = tf.matmul(Kuf, l, transpose_a=True) - tf.linalg.matrix_transpose(
        tf.reduce_sum(
            tf.matmul(D, tf.matmul(Kuu, tf.linalg.matrix_transpose(l)[..., None])) * tmp, axis=-2
        )
    )

    cov = Kff - tf.linalg.matrix_transpose(tf.reduce_sum(tf.square(tmp), axis=-2))
    return mean, cov


[docs]def project_diag_sites(
    Kuf: tf.Tensor,
    lambda_1: tf.Tensor,
    lambda_2: tf.Tensor,
    Kuu: Optional[tf.Tensor] = None,
    cholesky=True,
):
    """
    From  Kuu, Kuf, λ₁, λ₂, computes statistics
    L = Kuu⁻¹ (Σₙ Kufₙ λ₂ₙ Kfₙu) Kuu⁻¹
    l = Kuu⁻¹ (Σₙ Kufₙ λ₁ₙ)

    :param Kuf: L x M x N
    :param lambda_2: N x L
    :param lambda_1: N x L
    :param Kuu: L x M x M

    return:
    l : M x L
    L : L x M x M
    """

    shape_constraints = [
        (Kuf, [..., "M", "N"]),
        (lambda_1, ["N", "L"]),
        (lambda_2, ["N", "L"]),
    ]
    if Kuu is not None:
        shape_constraints.append((Kuu, [..., "M", "M"]))
    tf.debugging.assert_shapes(
        shape_constraints,
        message="conditional_from_precision_sites() arguments "
        "[Note that this check verifies the shape of an alternative "
        "representation of Kmn. See the docs for the actual expected "
        "shape.]",
    )

    num_latent = lambda_1.shape[-1]
    P = tf.tile(Kuf[None], [num_latent, 1, 1]) if tf.rank(Kuf) == 2 else Kuf
    if Kuu is not None:
        Kuu = Kuu[None] if tf.rank(Kuu) == 2 else Kuu
        Luu = tf.linalg.cholesky(Kuu)
        P = tf.linalg.cholesky_solve(Luu, P)

    l = tf.einsum("lmn,nl->ml", P, lambda_1)
    L = tf.einsum("lmn,lon,nl->lmo", P, P, lambda_2)
    if cholesky:
        L = tf.linalg.cholesky(L)
    return l, L


[docs]def kl_from_precision_sites_white(A, l, L=None, L2=None):
    """
    Computes the kl divergence KL[q(f)|p(f)]

    where:
    q(f) = N(μ,Σ) and covariance of a Gaussian with natural parameters
    with
        Σ = Λ⁻¹  = (A⁻¹  + A⁻¹LLᵀA⁻¹)⁻¹  = A(A + LLᵀ)⁻¹A
        μ = Λ⁻¹ A⁻¹l = Σ A⁻¹l

    p(f) = N(0,A)
    """

    shape_constraints = [
        (A, ["M", "M"]),
        (l, ["N", "L"]),
    ]
    if L is not None:
        shape_constraints.append((L, [..., "M", "M"]))
    if L2 is not None:
        shape_constraints.append((L2, [..., "M", "M"]))

    tf.debugging.assert_shapes(
        shape_constraints,
        message="kl_from_prec_precond() arguments "
        "[Note that this check verifies the shape of an alternative "
        "representation of Kmn. See the docs for the actual expected "
        "shape.]",
    )

    if L2 is None:
        L2 = L @ tf.linalg.matrix_transpose(L)
    m = tf.shape(L2)[-2]

    R = L2 + A
    LR = tf.linalg.cholesky(R)
    LA = tf.linalg.cholesky(A)

    # log det term
    log_det = tf.reduce_sum(tf.math.log(tf.square(tf.linalg.diag_part(LR)))) - tf.reduce_sum(
        tf.math.log(tf.square(tf.linalg.diag_part(LA)))
    )

    # trace term
    tmp = tf.linalg.triangular_solve(LR, LA)
    trace_plus_const = tf.reduce_sum(tf.square(tmp)) - tf.cast(m, A.dtype)

    # mahalanobis term
    mahalanobis = tf.reduce_sum(
        tf.square(tf.matmul(LA, tf.linalg.cholesky_solve(LR, l), transpose_a=True))
    )

    return 0.5 * (log_det + trace_plus_const + mahalanobis)


[docs]def kl_from_precision_sites(A, l, L=None, L2=None):
    """
    Computes the kl divergence KL[q(f)|p(f)]

    where:
    q(f) = N(μ,Σ) and covariance of a Gaussian with natural parameters
    with
        Σ = Λ⁻¹  = (A⁻¹  + A⁻¹LLᵀA⁻¹)⁻¹  = A(A + LLᵀ)⁻¹A
        μ = Λ⁻¹ A⁻¹l = Σ A⁻¹l

    p(f) = N(0,A)
    """

    shape_constraints = [
        (A, ["M", "M"]),
        (l, ["N", "L"]),
    ]
    if L is not None:
        shape_constraints.append((L, [..., "M", "M"]))
    if L2 is not None:
        shape_constraints.append((L2, [..., "M", "M"]))

    tf.debugging.assert_shapes(
        shape_constraints,
        message="kl_from_prec_precond() arguments "
        "[Note that this check verifies the shape of an alternative "
        "representation of Kmn. See the docs for the actual expected "
        "shape.]",
    )

    if L2 is None:
        L2 = L @ tf.linalg.matrix_transpose(L)
    m = tf.shape(L2)[-2]

    R = L2 + A
    LR = tf.linalg.cholesky(R)
    LA = tf.linalg.cholesky(A)

    # log det term
    log_det = tf.reduce_sum(tf.math.log(tf.square(tf.linalg.diag_part(LR)))) - tf.reduce_sum(
        tf.math.log(tf.square(tf.linalg.diag_part(LA)))
    )

    # trace term
    tmp = tf.linalg.triangular_solve(LR, LA)
    trace_plus_const = tf.reduce_sum(tf.square(tmp)) - tf.cast(m, A.dtype)

    # mahalanobis term
    mahalanobis = tf.reduce_sum(
        tf.square(tf.matmul(LA, tf.linalg.cholesky_solve(LR, l), transpose_a=True))
    )

    return 0.5 * (log_det + trace_plus_const + mahalanobis)


[docs]def posterior_from_dense_site(K, lambda_1, lambda_2_sqrt):
    """
    Returns the mean and cholesky factor of the density q(u) = p(u)t(u) = 𝓝(u; m, S)
    where p(u) = 𝓝(u; 0, K) and t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u)

    P = -2Λ₂

    S = (K⁻¹ + P)⁻¹ = (K⁻¹ + LLᵀ)⁻¹ = K - KLW⁻¹LᵀK , W = (I + LᵀKL)
    m = S λ₁

    Input:
    :param: K : M x M
    :param: lambda_1: M x P
    :param: lambda_2: P x M x M

    Output:
    m: M x P
    chol_S: P x M x M
    """
    shape_constraints = [(K, [..., "M", "M"]), (lambda_1, ["N", "L"]), (lambda_2_sqrt, ["L", "M", "M"])]
    tf.debugging.assert_shapes(
        shape_constraints,
        message="posterior_from_dense_site() arguments ",
    )

    L = lambda_2_sqrt  # L - likelihood precision square root
    m = K.shape[-1]
    Id = tf.eye(m, dtype=default_float())
    C = tf.linalg.cholesky(K)

    # W = I + Lₜᵀ Lₚ Lₚᵀ Lₜ, chol(W)
    CtL = tf.matmul(C, L, transpose_a=True)
    W = Id + tf.matmul(CtL, CtL, transpose_a=True)
    chol_W = tf.linalg.cholesky(W)

    # S_q = K - K W⁻¹K = K - [Lw⁻¹ K]ᵀ[Lw⁻¹ K]
    LtK = tf.matmul(L, K, transpose_a=True)
    iwLtK = tf.linalg.triangular_solve(chol_W, LtK, lower=True, adjoint=False)
    S_q = K - tf.matmul(iwLtK, iwLtK, transpose_a=True)
    chol_S_q = tf.linalg.cholesky(S_q)
    m_q = tf.einsum("lmn,nl->ml", S_q, lambda_1)

    return m_q, chol_S_q


[docs]def posterior_from_dense_site_white(K, lambda_1, lambda_2, jitter=1e-9):
    """
    Returns the mean and cholesky factor of the density q(u) = p(u)t(u) = 𝓝(u; m, S)
    where p(u) = 𝓝(u; 0, K) and t(u) = exp(uᵀλ₁ - ½ uᵀΛ₂u)

    S = Λ₂⁻¹ = (K⁻¹ + K⁻¹PK⁻¹)⁻¹ = = [K⁻¹(K + P)K⁻¹]⁻¹  = K(K + P)⁻¹K
    m = S K⁻¹λ₁ = K(K + LLᵀ)⁻¹λ₁

    Input:
    :param: K : M x M
    :param: lambda_1: M x P
    :param: lambda_2: P x M x M

    Output:
    m: M x P
    chol_S: P x M x M
    """

    shape_constraints = [(K, ["M", "M"]), (lambda_1, ["N", "L"]), (lambda_2, ["L", "M", "M"])]
    tf.debugging.assert_shapes(
        shape_constraints,
        message="posterior_from_dense_site_white() arguments ",
    )
    P = lambda_2  # L - likelihood precision square root
    m = K.shape[-1]
    Id = tf.eye(m, dtype=tf.float64)
    R = K + P
    LR = tf.linalg.cholesky(R + Id * jitter)
    iLRK = tf.linalg.triangular_solve(LR, K)
    S_q = tf.matmul(iLRK, iLRK, transpose_a=True)
    chol_S_q = tf.linalg.cholesky(S_q)
    m_q = (K @ tf.linalg.cholesky_solve(LR, lambda_1))[0]
    return m_q, chol_S_q


[docs]def gradient_transformation_mean_var_to_expectation(inputs, grads):
    """
    Transforms gradient 𝐠 of a function wrt [μ, σ²]
    into its gradients wrt to [μ, σ² + μ²]
    :param inputs: [μ, σ²]
    :param grads: 𝐠
    Output:
    ▽μ
    """
    return grads[0] - 2.0 * tf.einsum("lmo,ol->ml", grads[1], inputs), grads[1]