MNIST Classification

[1]:


import numpy as np
import tensorflow as tf
import gpflow
from gpflow.optimizers import NaturalGradient
from src.models.tsvgp import t_SVGP
import logging
import time
from tqdm import tqdm
tf.get_logger().setLevel(logging.ERROR)

rng = np.random.RandomState(1)
tf.random.set_seed(1)
2021-12-13 23:56:06.324929: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.7.12/x64/lib
2021-12-13 23:56:06.324962: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

Loading MNIST data

[2]:

def load_mnist():
    mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()

    x, y = mnist_train
    x = tf.reshape(x, [x.shape[0], -1]).numpy()
    x = x.astype(np.float64)/255
    y = np.reshape(y, (-1, 1))
    y = np.int64(y)

    xt, yt = mnist_test
    xt = tf.reshape(xt, [xt.shape[0], -1]).numpy()
    xt = xt.astype(np.float64)/255
    yt = np.reshape(yt, (-1, 1))
    yt = np.int64(yt)

    perm = rng.permutation(x.shape[0])
    np.take(x, perm, axis=0, out=x)
    np.take(y, perm, axis=0, out=y)

    return x, y, xt, yt


M = 100         # Number of inducing points
C = 10          # Number of classes
mb_size = 200   # Size of minibatch during training
nit = 150       # Number of training iterations
nat_lr = 0.03   # Learning rate for E-step (variational params)
adam_lr = 0.02  # Learning rate for M-step (hyperparams)
n_e_steps = 1   # Number of E-steps per step
n_m_steps = 1   # Number of M-steps per step



# Initial hyperparameters
ell = 1.0
var = 1.0

# Load data
X, Y, XT, YT = load_mnist()

# Training data
train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat().shuffle(X.shape[0])

# Initialize inducing locations to the first M inputs in the data set
Z = X[:M, :].copy()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11493376/11490434 [==============================] - 0s 0us/step
2021-12-13 23:56:08.220286: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.7.12/x64/lib
2021-12-13 23:56:08.220319: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-13 23:56:08.220340: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fv-az37-217): /proc/driver/nvidia/version does not exist
2021-12-13 23:56:08.220605: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-13 23:56:08.757121: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 376320000 exceeds 10% of free system memory.

Declaring Classification model

[3]:

models = []
names = []

# Set up the 'standard' q-SVGP model
m = gpflow.models.SVGP(
        kernel=gpflow.kernels.Matern52(lengthscales=np.ones((1, X.shape[1]))*ell, variance=var),
        likelihood=gpflow.likelihoods.Softmax(C),
        inducing_variable=Z.copy(),
        num_data=X.shape[0],
        whiten=True,
        num_latent_gps=C)

gpflow.set_trainable(m.q_mu, False)
gpflow.set_trainable(m.q_sqrt, False)

models.append(m)
names.append('q-SVGP')

# Set up the t-SVGP model
m = t_SVGP(
        kernel=gpflow.kernels.Matern52(lengthscales=np.ones((1, X.shape[1]))*ell, variance=var),
        likelihood=gpflow.likelihoods.Softmax(C),
        inducing_variable=Z.copy(),
        num_data=X.shape[0],
        num_latent_gps=C)

gpflow.set_trainable(m.lambda_1, False)
gpflow.set_trainable(m.lambda_2_sqrt, False)

models.append(m)
names.append('t-SVGP')
2021-12-13 23:56:08.960955: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.

Training model

[4]:


def train(model, iterations):
    """
    Utility function for training SVGP models with natural gradients

    :param model: GPflow model
    :param iterations: number of iterations
    """

    print("Optimizing model: ", model.name)

    natgrad_opt = NaturalGradient(gamma=nat_lr)

    tf.random.set_seed(4)
    train_iter = iter(train_dataset.batch(mb_size))

    tf.random.set_seed(4)
    train_iter2 = iter(train_dataset.batch(mb_size))

    training_loss = model.training_loss_closure(train_iter, compile=True)
    training_loss2 = model.training_loss_closure(train_iter2, compile=True)

    # Define the M-step (that is called in the same way for both)
    optimizer = tf.optimizers.Adam(adam_lr)

    @tf.function
    def optimization_m_step(training_loss, params):
        optimizer.minimize(training_loss, var_list=params)

    # Define the E-steps
    def optimization_step_nat(training_loss, variational_params):
        natgrad_opt.minimize(training_loss, var_list=variational_params)

    @tf.function
    def optimization_e_step(model, data):
        model.natgrad_step(data, lr=nat_lr)

    for _ in tqdm(range(iterations)):
        data = next(train_iter)

        if model.name == 'svgp' and model.q_mu.trainable == False:
            variational_params = [(model.q_mu, model.q_sqrt)]
            optimization_e_step = tf.function(lambda loss: optimization_step_nat(loss, variational_params))

            for i in range(n_e_steps):
                optimization_e_step(training_loss)
            for j in range(n_m_steps):
                optimization_m_step(training_loss2, model.trainable_variables)

        elif model.name == 't_svgp':
            for i in range(n_e_steps):
                optimization_e_step(model, data)
            for i in range(n_m_steps):
                optimization_m_step(training_loss2, model.trainable_variables)

        else:
            raise("No training setup for this model.")


for m, name in zip(models, names):
    t0 = time.time()
    train(m, nit)
    t = time.time()-t0

    # Calculate NLPD on test set
    nlpd = -tf.reduce_mean(m.predict_log_density((XT, YT))).numpy()

    # Calculate accuracy on test set
    pred = m.predict_y(XT)[0]
    pred_argmax = tf.reshape(tf.argmax(pred, axis=1), (-1, 1))
    acc = np.mean(pred_argmax == YT)

    print('Training time for', name, 'was', t, 'seconds')
    print(name, 'test NLPD =', nlpd)
    print(name, 'test accuracy =', acc)
2021-12-13 23:56:09.000090: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 376320000 exceeds 10% of free system memory.
2021-12-13 23:56:09.131922: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 376320000 exceeds 10% of free system memory.
Optimizing model:  svgp
  0%|          | 0/150 [00:00<?, ?it/s]2021-12-13 23:56:15.329909: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-12-13 23:56:15.371163: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2593905000 Hz
100%|██████████| 150/150 [01:52<00:00,  1.33it/s]
Training time for q-SVGP was 112.97154688835144 seconds
q-SVGP test NLPD = 0.2757943476980467
q-SVGP test accuracy = 0.9358
Optimizing model:  t_svgp
2021-12-13 23:58:04.178573: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 376320000 exceeds 10% of free system memory.
2021-12-13 23:58:04.326857: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 376320000 exceeds 10% of free system memory.
100%|██████████| 150/150 [00:47<00:00,  3.13it/s]
Training time for t-SVGP was 48.36657905578613 seconds
t-SVGP test NLPD = 0.26730028827055347
t-SVGP test accuracy = 0.939