android-10.0.0_r1.0/s

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The Independent distribution class."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution as distribution_lib
from tensorflow.python.ops.distributions import kullback_leibler
from tensorflow.python.util import deprecation


class Independent(distribution_lib.Distribution):
  """Independent distribution from batch of distributions.

  This distribution is useful for regarding a collection of independent,
  non-identical distributions as a single random variable. For example, the
  `Independent` distribution composed of a collection of `Bernoulli`
  distributions might define a distribution over an image (where each
  `Bernoulli` is a distribution over each pixel).

  More precisely, a collection of `B` (independent) `E`-variate random variables
  (rv) `{X_1, ..., X_B}`, can be regarded as a `[B, E]`-variate random variable
  `(X_1, ..., X_B)` with probability
  `p(x_1, ..., x_B) = p_1(x_1) * ... * p_B(x_B)` where `p_b(X_b)` is the
  probability of the `b`-th rv. More generally `B, E` can be arbitrary shapes.

  Similarly, the `Independent` distribution specifies a distribution over `[B,
  E]`-shaped events. It operates by reinterpreting the rightmost batch dims as
  part of the event dimensions. The `reinterpreted_batch_ndims` parameter
  controls the number of batch dims which are absorbed as event dims;
  `reinterpreted_batch_ndims < len(batch_shape)`.  For example, the `log_prob`
  function entails a `reduce_sum` over the rightmost `reinterpreted_batch_ndims`
  after calling the base distribution's `log_prob`.  In other words, since the
  batch dimension(s) index independent distributions, the resultant multivariate
  will have independent components.

  #### Mathematical Details

  The probability function is,

  ```none
  prob(x; reinterpreted_batch_ndims) = tf.reduce_prod(
      dist.prob(x),
      axis=-1-range(reinterpreted_batch_ndims))
  ```

  #### Examples

  ```python
  import tensorflow_probability as tfp
  tfd = tfp.distributions

  # Make independent distribution from a 2-batch Normal.
  ind = tfd.Independent(
      distribution=tfd.Normal(loc=[-1., 1], scale=[0.1, 0.5]),
      reinterpreted_batch_ndims=1)

  # All batch dims have been "absorbed" into event dims.
  ind.batch_shape  # ==> []
  ind.event_shape  # ==> [2]

  # Make independent distribution from a 2-batch bivariate Normal.
  ind = tfd.Independent(
      distribution=tfd.MultivariateNormalDiag(
          loc=[[-1., 1], [1, -1]],
          scale_identity_multiplier=[1., 0.5]),
      reinterpreted_batch_ndims=1)

  # All batch dims have been "absorbed" into event dims.
  ind.batch_shape  # ==> []
  ind.event_shape  # ==> [2, 2]
  ```

  """

  @deprecation.deprecated(
      "2018-10-01",
      "The TensorFlow Distributions library has moved to "
      "TensorFlow Probability "
      "(https://github.com/tensorflow/probability). You "
      "should update all references to use `tfp.distributions` "
      "instead of `tf.contrib.distributions`.",
      warn_once=True)
  def __init__(
      self, distribution, reinterpreted_batch_ndims=None,
      validate_args=False, name=None):
    """Construct a `Independent` distribution.

    Args:
      distribution: The base distribution instance to transform. Typically an
        instance of `Distribution`.
      reinterpreted_batch_ndims: Scalar, integer number of rightmost batch dims
        which will be regarded as event dims. When `None` all but the first
        batch axis (batch axis 0) will be transferred to event dimensions
        (analogous to `tf.layers.flatten`).
      validate_args: Python `bool`.  Whether to validate input with asserts.
        If `validate_args` is `False`, and the inputs are invalid,
        correct behavior is not guaranteed.
      name: The name for ops managed by the distribution.
        Default value: `Independent + distribution.name`.

    Raises:
      ValueError: if `reinterpreted_batch_ndims` exceeds
        `distribution.batch_ndims`
    """
    parameters = dict(locals())
    name = name or "Independent" + distribution.name
    self._distribution = distribution
    with ops.name_scope(name) as name:
      if reinterpreted_batch_ndims is None:
        reinterpreted_batch_ndims = self._get_default_reinterpreted_batch_ndims(
            distribution)
      reinterpreted_batch_ndims = ops.convert_to_tensor(
          reinterpreted_batch_ndims,
          dtype=dtypes.int32,
          name="reinterpreted_batch_ndims")
      self._reinterpreted_batch_ndims = reinterpreted_batch_ndims
      self._static_reinterpreted_batch_ndims = tensor_util.constant_value(
          reinterpreted_batch_ndims)
      if self._static_reinterpreted_batch_ndims is not None:
        self._reinterpreted_batch_ndims = self._static_reinterpreted_batch_ndims
      super(Independent, self).__init__(
          dtype=self._distribution.dtype,
          reparameterization_type=self._distribution.reparameterization_type,
          validate_args=validate_args,
          allow_nan_stats=self._distribution.allow_nan_stats,
          parameters=parameters,
          graph_parents=(
              [reinterpreted_batch_ndims] +
              distribution._graph_parents),  # pylint: disable=protected-access
          name=name)
      self._runtime_assertions = self._make_runtime_assertions(
          distribution, reinterpreted_batch_ndims, validate_args)

  @property
  def distribution(self):
    return self._distribution

  @property
  def reinterpreted_batch_ndims(self):
    return self._reinterpreted_batch_ndims

  def _batch_shape_tensor(self):
    with ops.control_dependencies(self._runtime_assertions):
      batch_shape = self.distribution.batch_shape_tensor()
      dim0 = tensor_shape.dimension_value(
          batch_shape.shape.with_rank_at_least(1)[0])
      batch_ndims = (dim0
                     if dim0 is not None
                     else array_ops.shape(batch_shape)[0])
      return batch_shape[:batch_ndims - self.reinterpreted_batch_ndims]

  def _batch_shape(self):
    batch_shape = self.distribution.batch_shape
    if (self._static_reinterpreted_batch_ndims is None
        or batch_shape.ndims is None):
      return tensor_shape.TensorShape(None)
    d = batch_shape.ndims - self._static_reinterpreted_batch_ndims
    return batch_shape[:d]

  def _event_shape_tensor(self):
    with ops.control_dependencies(self._runtime_assertions):
      batch_shape = self.distribution.batch_shape_tensor()
      dim0 = tensor_shape.dimension_value(
          batch_shape.shape.with_rank_at_least(1)[0])
      batch_ndims = (dim0
                     if dim0 is not None
                     else array_ops.shape(batch_shape)[0])
      return array_ops.concat([
          batch_shape[batch_ndims - self.reinterpreted_batch_ndims:],
          self.distribution.event_shape_tensor(),
      ], axis=0)

  def _event_shape(self):
    batch_shape = self.distribution.batch_shape
    if (self._static_reinterpreted_batch_ndims is None
        or batch_shape.ndims is None):
      return tensor_shape.TensorShape(None)
    d = batch_shape.ndims - self._static_reinterpreted_batch_ndims
    return batch_shape[d:].concatenate(self.distribution.event_shape)

  def _sample_n(self, n, seed):
    with ops.control_dependencies(self._runtime_assertions):
      return self.distribution.sample(sample_shape=n, seed=seed)

  def _log_prob(self, x):
    with ops.control_dependencies(self._runtime_assertions):
      return self._reduce_sum(self.distribution.log_prob(x))

  def _entropy(self):
    with ops.control_dependencies(self._runtime_assertions):
      return self._reduce_sum(self.distribution.entropy())

  def _mean(self):
    with ops.control_dependencies(self._runtime_assertions):
      return self.distribution.mean()

  def _variance(self):
    with ops.control_dependencies(self._runtime_assertions):
      return self.distribution.variance()

  def _stddev(self):
    with ops.control_dependencies(self._runtime_assertions):
      return self.distribution.stddev()

  def _mode(self):
    with ops.control_dependencies(self._runtime_assertions):
      return self.distribution.mode()

  def _make_runtime_assertions(
      self, distribution, reinterpreted_batch_ndims, validate_args):
    assertions = []
    static_reinterpreted_batch_ndims = tensor_util.constant_value(
        reinterpreted_batch_ndims)
    batch_ndims = distribution.batch_shape.ndims
    if batch_ndims is not None and static_reinterpreted_batch_ndims is not None:
      if static_reinterpreted_batch_ndims > batch_ndims:
        raise ValueError("reinterpreted_batch_ndims({}) cannot exceed "
                         "distribution.batch_ndims({})".format(
                             static_reinterpreted_batch_ndims, batch_ndims))
    elif validate_args:
      batch_shape = distribution.batch_shape_tensor()
      dim0 = tensor_shape.dimension_value(
          batch_shape.shape.with_rank_at_least(1)[0])
      batch_ndims = (
          dim0
          if dim0 is not None
          else array_ops.shape(batch_shape)[0])
      assertions.append(check_ops.assert_less_equal(
          reinterpreted_batch_ndims, batch_ndims,
          message=("reinterpreted_batch_ndims cannot exceed "
                   "distribution.batch_ndims")))
    return assertions

  def _reduce_sum(self, stat):
    if self._static_reinterpreted_batch_ndims is None:
      range_ = math_ops.range(self._reinterpreted_batch_ndims)
    else:
      range_ = np.arange(self._static_reinterpreted_batch_ndims)
    return math_ops.reduce_sum(stat, axis=-1-range_)

  def _get_default_reinterpreted_batch_ndims(self, distribution):
    """Computes the default value for reinterpreted_batch_ndim __init__ arg."""
    ndims = distribution.batch_shape.ndims
    if ndims is None:
      which_maximum = math_ops.maximum
      ndims = array_ops.shape(distribution.batch_shape_tensor())[0]
    else:
      which_maximum = np.maximum
    return which_maximum(0, ndims - 1)


@kullback_leibler.RegisterKL(Independent, Independent)
@deprecation.deprecated(
    "2018-10-01",
    "The TensorFlow Distributions library has moved to "
    "TensorFlow Probability "
    "(https://github.com/tensorflow/probability). You "
    "should update all references to use `tfp.distributions` "
    "instead of `tf.contrib.distributions`.",
    warn_once=True)
def _kl_independent(a, b, name="kl_independent"):
  """Batched KL divergence `KL(a || b)` for Independent distributions.

  We can leverage the fact that
  ```
  KL(Independent(a) || Independent(b)) = sum(KL(a || b))
  ```
  where the sum is over the `reinterpreted_batch_ndims`.

  Args:
    a: Instance of `Independent`.
    b: Instance of `Independent`.
    name: (optional) name to use for created ops. Default "kl_independent".

  Returns:
    Batchwise `KL(a || b)`.

  Raises:
    ValueError: If the event space for `a` and `b`, or their underlying
      distributions don't match.
  """
  p = a.distribution
  q = b.distribution

  # The KL between any two (non)-batched distributions is a scalar.
  # Given that the KL between two factored distributions is the sum, i.e.
  # KL(p1(x)p2(y) || q1(x)q2(y)) = KL(p1 || q1) + KL(q1 || q2), we compute
  # KL(p || q) and do a `reduce_sum` on the reinterpreted batch dimensions.
  if a.event_shape.is_fully_defined() and b.event_shape.is_fully_defined():
    if a.event_shape == b.event_shape:
      if p.event_shape == q.event_shape:
        num_reduce_dims = a.event_shape.ndims - p.event_shape.ndims
        reduce_dims = [-i - 1 for i in range(0, num_reduce_dims)]

        return math_ops.reduce_sum(
            kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims)
      else:
        raise NotImplementedError("KL between Independents with different "
                                  "event shapes not supported.")
    else:
      raise ValueError("Event shapes do not match.")
  else:
    with ops.control_dependencies([
        check_ops.assert_equal(a.event_shape_tensor(), b.event_shape_tensor()),
        check_ops.assert_equal(p.event_shape_tensor(), q.event_shape_tensor())
    ]):
      num_reduce_dims = (
          array_ops.shape(a.event_shape_tensor()[0]) -
          array_ops.shape(p.event_shape_tensor()[0]))
      reduce_dims = math_ops.range(-num_reduce_dims - 1, -1, 1)
      return math_ops.reduce_sum(
          kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims)