Source code for pyro.distributions.testing.gof

# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
# Copyright (c) 2015, Gamelan Labs, Inc.
# Copyright (c) 2016, Google, Inc.
# Copyright (c) 2019, Gamalon, Inc.
# Copyright Contributors to the Pyro project.
# SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# - Neither the name of Salesforce.com nor the names of its contributors
#   may be used to endorse or promote products derived from this
#   software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""
Goodness of Fit Testing
-----------------------

This module implements goodness of fit tests for checking agreement between
distributions' ``.sample()`` and ``.log_prob()`` methods. The main functions
return a goodness of fit p-value ``gof`` which for good data should be
``Uniform(0,1)`` distributed and for bad data should be close to zero. To use
this returned number in tests, set a global variable ``TEST_FAILURE_RATE`` to
something smaller than 1 / number of tests in your suite, then in each test
assert ``gof > TEST_FAILURE_RATE``. For example::

    TEST_FAILURE_RATE = 1 / 20  # For 1 in 20 chance of spurious failure.

    def test_my_distribution():
        d = MyDistribution()
        samples = d.sample([10000])
        probs = d.log_prob(samples).exp()
        gof = auto_goodness_of_fit(samples, probs)
        assert gof > TEST_FAILURE_RATE

This module is a port of the
`goftests <https://github.com/posterior/goftests>`_ library.
"""

import math
import warnings

import torch

from .special import chi2sf

HISTOGRAM_WIDTH = 60


class InvalidTest(ValueError):
    pass


def print_histogram(probs, counts):
    max_count = int(max(counts))
    print("{: >8} {: >8}".format("Prob", "Count"))
    for prob, count in sorted(zip(probs, counts), reverse=True):
        width = int(round(HISTOGRAM_WIDTH * int(count) / max_count))
        print("{: >8.3f} {: >8d} {}".format(prob, count, "-" * width))


[docs]@torch.no_grad() def multinomial_goodness_of_fit( probs, counts, *, total_count=None, plot=False, ): """ Pearson's chi^2 test, on possibly truncated data. https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test :param torch.Tensor probs: Vector of probabilities. :param torch.Tensor counts: Vector of counts. :param int total_count: Optional total count in case data is truncated, otherwise None. :param bool plot: Whether to print a histogram. Defaults to False. :returns: p-value of truncated multinomial sample. :rtype: float """ assert probs.dim() == 1 assert probs.shape == counts.shape if total_count is None: truncated = False total_count = int(counts.sum()) else: truncated = True assert total_count >= counts.sum() if plot: print_histogram(probs, counts) chi_squared = 0 dof = 0 for p, c in zip(probs.tolist(), counts.tolist()): if abs(p - 1) < 1e-8: return 1 if c == total_count else 0 assert p < 1, f"bad probability: {p:g}" if p > 0: mean = total_count * p variance = total_count * p * (1 - p) if not (variance > 1): raise InvalidTest("Goodness of fit is inaccurate; use more samples") chi_squared += (c - mean) ** 2 / variance dof += 1 else: warnings.warn("Zero probability in goodness-of-fit test") if c > 0: return math.inf if not truncated: dof -= 1 survival = chi2sf(chi_squared, dof) return survival
[docs]@torch.no_grad() def unif01_goodness_of_fit(samples, *, plot=False): """ Bin uniformly distributed samples and apply Pearson's chi^2 test. :param torch.Tensor samples: A vector of real-valued samples from a candidate distribution that should be Uniform(0, 1)-distributed. :param bool plot: Whether to print a histogram. Defaults to False. :returns: Goodness of fit, as a p-value. :rtype: float """ assert samples.min() >= 0 assert samples.max() <= 1 bin_count = int(round(len(samples) ** 0.333)) if bin_count < 7: raise InvalidTest("imprecise test, use more samples") probs = torch.ones(bin_count) / bin_count binned = samples.mul(bin_count).long().clamp(min=0, max=bin_count - 1) counts = torch.zeros(bin_count) counts.scatter_add_(0, binned, torch.ones(binned.shape)) return multinomial_goodness_of_fit(probs, counts, plot=plot)
[docs]@torch.no_grad() def exp_goodness_of_fit(samples, plot=False): """ Transform exponentially distribued samples to Uniform(0,1) distribution and assess goodness of fit via binned Pearson's chi^2 test. :param torch.Tensor samples: A vector of real-valued samples from a candidate distribution that should be Exponential(1)-distributed. :param bool plot: Whether to print a histogram. Defaults to False. :returns: Goodness of fit, as a p-value. :rtype: float """ unif01_samples = samples.neg().exp() return unif01_goodness_of_fit(unif01_samples, plot=plot)
[docs]@torch.no_grad() def density_goodness_of_fit(samples, probs, plot=False): """ Transform arbitrary continuous samples to Uniform(0,1) distribution and assess goodness of fit via binned Pearson's chi^2 test. :param torch.Tensor samples: A vector list of real-valued samples from a distribution. :param torch.Tensor probs: A vector of probability densities evaluated at those samples. :param bool plot: Whether to print a histogram. Defaults to False. :returns: Goodness of fit, as a p-value. :rtype: float """ assert samples.shape == probs.shape if len(samples) <= 100: raise InvalidTest("imprecision; use more samples") samples, index = samples.sort(0) probs = probs[index] gaps = samples[1:] - samples[:-1] sparsity = 1 / probs sparsity = 0.5 * (sparsity[1:] + sparsity[:-1]) density = len(samples) / sparsity exp_samples = density * gaps return exp_goodness_of_fit(exp_samples, plot=plot)
def volume_of_sphere(dim, radius): return radius**dim * math.pi ** (0.5 * dim) / math.gamma(0.5 * dim + 1) def get_nearest_neighbor_distances(samples): try: # This version scales as O(N log(N)). from scipy.spatial import cKDTree samples = samples.cpu().numpy() distances, indices = cKDTree(samples).query(samples, k=2) return torch.from_numpy(distances[:, 1]) except ImportError: # This version scales as O(N^2). distances, indices = torch.cdist(samples, samples).kthvalue(k=2) return distances
[docs]@torch.no_grad() def vector_density_goodness_of_fit(samples, probs, *, dim=None, plot=False): """ Transform arbitrary multivariate continuous samples to Univariate(0,1) distribution via nearest neighbor distribution [1,2,3] and assess goodness of fit via binned Pearson's chi^2 test. [1] Peter J. Bickel and Leo Breiman (1983) "Sums of Functions of Nearest Neighbor Distances, Moment Bounds, Limit Theorems and a Goodness of Fit Test" https://projecteuclid.org/download/pdf_1/euclid.aop/1176993668 [2] Mike Williams (2010) "How good are your fits? Unbinned multivariate goodness-of-fit tests in high energy physics." https://arxiv.org/abs/1006.3019 [3] Nearest Neighbour Distribution https://en.wikipedia.org/wiki/Nearest_neighbour_distribution :param torch.Tensor samples: A tensor of real-vector-valued samples from a distribution. :param torch.Tensor probs: A vector of probability densities evaluated at those samples. :param int dim: Optional dimension of the submanifold on which data lie. Defaults to ``samples.shape[-1]``. :param bool plot: Whether to print a histogram. Defaults to False. :returns: Goodness of fit, as a p-value. :rtype: float """ assert samples.shape and len(samples) assert probs.shape == samples.shape[:1] if dim is None: dim = samples.shape[-1] assert dim if len(samples) <= 1000 * dim: raise InvalidTest("imprecision; use more samples") radii = get_nearest_neighbor_distances(samples) density = len(samples) * probs volume = volume_of_sphere(dim, radii) exp_samples = density * volume return exp_goodness_of_fit(exp_samples, plot=plot)
[docs]@torch.no_grad() def auto_goodness_of_fit(samples, probs, *, dim=None, plot=False): """ Dispatch on sample dimension and delegate to either :func:`density_goodness_of_fit` or :func:`vector_density_goodness_of_fit`. :param torch.Tensor samples: A tensor of samples stacked on their leftmost dimension. :param torch.Tensor probs: A vector of probabilities evaluated at those samples. :param int dim: Optional manifold dimension, defaults to ``samples.shape[1:].numel()``. :param bool plot: Whether to print a histogram. Defaults to False. """ assert samples.shape and samples.shape[0] assert probs.shape == samples.shape[:1] samples = samples.reshape(samples.shape[0], -1) ambient_dim = samples.shape[1:].numel() if dim is None: dim = ambient_dim if ambient_dim == 0: return 1.0 if ambient_dim == 1: samples = samples.reshape(-1) return density_goodness_of_fit(samples, probs, plot=plot) return vector_density_goodness_of_fit(samples, probs, dim=dim, plot=plot)