Source code for pyro.contrib.gp.models.gpr

from __future__ import absolute_import, division, print_function

from torch.distributions import constraints
from torch.nn import Parameter

import pyro
import pyro.distributions as dist
from pyro.contrib.gp.models.model import GPModel
from pyro.contrib.gp.util import conditional
from pyro.params import param_with_module_name


[docs]class GPRegression(GPModel): r""" Gaussian Process Regression model. The core of a Gaussian Process is a covariance function :math:`k` which governs the similarity between input points. Given :math:`k`, we can establish a distribution over functions :math:`f` by a multivarite normal distribution .. math:: p(f(X)) = \mathcal{N}(0, k(X, X)), where :math:`X` is any set of input points and :math:`k(X, X)` is a covariance matrix whose entries are outputs :math:`k(x, z)` of :math:`k` over input pairs :math:`(x, z)`. This distribution is usually denoted by .. math:: f \sim \mathcal{GP}(0, k). .. note:: Generally, beside a covariance matrix :math:`k`, a Gaussian Process can also be specified by a mean function :math:`m` (which is a zero-value function by default). In that case, its distribution will be .. math:: p(f(X)) = \mathcal{N}(m(X), k(X, X)). Given inputs :math:`X` and their noisy observations :math:`y`, the Gaussian Process Regression model takes the form .. math:: f &\sim \mathcal{GP}(0, k(X, X)),\\ y & \sim f + \epsilon, where :math:`\epsilon` is Gaussian noise. .. note:: This model has :math:`\mathcal{O}(N^3)` complexity for training, :math:`\mathcal{O}(N^3)` complexity for testing. Here, :math:`N` is the number of train inputs. Reference: [1] `Gaussian Processes for Machine Learning`, Carl E. Rasmussen, Christopher K. I. Williams :param torch.Tensor X: A input data for training. Its first dimension is the number of data points. :param torch.Tensor y: An output data for training. Its last dimension is the number of data points. :param ~pyro.contrib.gp.kernels.kernel.Kernel kernel: A Pyro kernel object, which is the covariance function :math:`k`. :param torch.Tensor noise: Variance of Gaussian noise of this model. :param callable mean_function: An optional mean function :math:`m` of this Gaussian process. By default, we use zero mean. :param float jitter: A small positive term which is added into the diagonal part of a covariance matrix to help stablize its Cholesky decomposition. :param str name: Name of this model. """ def __init__(self, X, y, kernel, noise=None, mean_function=None, jitter=1e-6, name="GPR"): super(GPRegression, self).__init__(X, y, kernel, mean_function, jitter, name) noise = self.X.new_ones(()) if noise is None else noise self.noise = Parameter(noise) self.set_constraint("noise", constraints.greater_than(self.jitter))
[docs] def model(self): self.set_mode("model") noise = self.get_param("noise") Kff = self.kernel(self.X) + noise.expand(self.X.shape[0]).diag() Lff = Kff.potrf(upper=False) zero_loc = self.X.new_zeros(self.X.shape[0]) f_loc = zero_loc + self.mean_function(self.X) if self.y is None: f_var = Lff.pow(2).sum(dim=-1) return f_loc, f_var else: y_name = param_with_module_name(self.name, "y") return pyro.sample(y_name, dist.MultivariateNormal(f_loc, scale_tril=Lff) .expand_by(self.y.shape[:-1]) .independent(self.y.dim() - 1), obs=self.y)
[docs] def guide(self): self.set_mode("guide") noise = self.get_param("noise") return noise
[docs] def forward(self, Xnew, full_cov=False, noiseless=True): r""" Computes the mean and covariance matrix (or variance) of Gaussian Process posterior on a test input data :math:`X_{new}`: .. math:: p(f^* \mid X_{new}, X, y, k, \epsilon) = \mathcal{N}(loc, cov). .. note:: The noise parameter ``noise`` (:math:`\epsilon`) together with kernel's parameters have been learned from a training procedure (MCMC or SVI). :param torch.Tensor Xnew: A input data for testing. Note that ``Xnew.shape[1:]`` must be the same as ``self.X.shape[1:]``. :param bool full_cov: A flag to decide if we want to predict full covariance matrix or just variance. :param bool noiseless: A flag to decide if we want to include noise in the prediction output or not. :returns: loc and covariance matrix (or variance) of :math:`p(f^*(X_{new}))` :rtype: tuple(torch.Tensor, torch.Tensor) """ self._check_Xnew_shape(Xnew) noise = self.guide() Kff = self.kernel(self.X) + noise.expand(self.X.shape[0]).diag() Lff = Kff.potrf(upper=False) y_residual = self.y - self.mean_function(self.X) loc, cov = conditional(Xnew, self.X, self.kernel, y_residual, None, Lff, full_cov, jitter=self.jitter) if full_cov and not noiseless: cov = cov + noise.expand(Xnew.shape[0]).diag() if not full_cov and not noiseless: cov = cov + noise.expand(Xnew.shape[0]) return loc + self.mean_function(Xnew), cov