Home | History | Annotate | Download | only in state_space_models
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 r"""Multivariate autoregressive model (vector autoregression).
     16 
     17 Implements the following model (num_blocks = max(ar_order, ma_order + 1)):
     18 
     19   y(t, 1) = \sum_{i=1}^{ar_order} ar_coefs[i] * y(t - 1, i)
     20   y(t, i) = y(t - 1, i - 1) + ma_coefs[i - 1] * e(t) for 1 < i < num_blocks
     21   y(t, num_blocks) = y(t - 1, num_blocks - 1) + e(t)
     22 
     23 Where e(t) are Gaussian with zero mean and learned covariance.
     24 
     25 Each element of ar_coefs and ma_coefs is a [num_features x num_features]
     26 matrix. Each y(t, i) is a vector of length num_features. Indices in the above
     27 equations are one-based. Initial conditions y(0, i) come from prior state (which
     28 may either be learned or left as a constant with high prior covariance).
     29 
     30 If ar_order > ma_order, the observation model is:
     31   y(t, 1) + observation_noise(t)
     32 
     33 If ma_order >= ar_order, it is (to observe the moving average component):
     34   y(t, 1) + y(t, num_blocks) + observation_noise(t)
     35 
     36 Where observation_noise(t) are Gaussian with zero mean and learned covariance.
     37 
     38 This implementation uses a formulation which puts all of the autoregressive
     39 coefficients in the transition equation for the observed component, which
     40 enables learning using truncated backpropagation. Noise is not applied directly
     41 to the observed component (with the exception of standard observation noise),
     42 which further aids learning of the autoregressive coefficients when VARMA is in
     43 an ensemble with other models (in which case having an observation noise term is
     44 usually unavoidable).
     45 """
     46 
     47 from __future__ import absolute_import
     48 from __future__ import division
     49 from __future__ import print_function
     50 
     51 from tensorflow.contrib.timeseries.python.timeseries import math_utils
     52 from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model
     53 
     54 from tensorflow.python.framework import ops
     55 from tensorflow.python.ops import array_ops
     56 from tensorflow.python.ops import init_ops
     57 from tensorflow.python.ops import linalg_ops
     58 from tensorflow.python.ops import math_ops
     59 from tensorflow.python.ops import variable_scope
     60 
     61 
     62 class VARMA(state_space_model.StateSpaceModel):
     63   """A VARMA model implementation as a special case of the state space model."""
     64 
     65   def __init__(self,
     66                autoregressive_order,
     67                moving_average_order,
     68                configuration=state_space_model.StateSpaceModelConfiguration()):
     69     """Construct a VARMA model.
     70 
     71     The size of the latent state for this model is:
     72       num_features * max(autoregressive_order, moving_average_order + 1)
     73     Square matrices of this size are constructed and multiplied.
     74 
     75     Args:
     76       autoregressive_order: The maximum autoregressive lag.
     77       moving_average_order: The maximum moving average lag, after which
     78         transient deviations are expected to return to their long-term mean.
     79       configuration: A StateSpaceModelConfiguration object.
     80     """
     81     self.ar_order = autoregressive_order
     82     self.ma_order = moving_average_order
     83     self.state_num_blocks = max(autoregressive_order, moving_average_order + 1)
     84     super(VARMA, self).__init__(configuration=configuration)
     85     self.state_dimension = self.state_num_blocks * self.num_features
     86 
     87   def _define_parameters(self, observation_transition_tradeoff_log=None):
     88     with variable_scope.variable_scope(self._variable_scope):
     89       # TODO(allenl): Evaluate parameter transformations for AR/MA coefficients
     90       # which improve interpretability/stability.
     91       self.ar_coefs = variable_scope.get_variable(
     92           name="ar_coefs",
     93           shape=[self.num_features, self.num_features, self.ar_order],
     94           dtype=self.dtype,
     95           initializer=init_ops.zeros_initializer())
     96       self.ma_coefs = variable_scope.get_variable(
     97           name="ma_coefs",
     98           initializer=array_ops.tile(
     99               linalg_ops.eye(self.num_features, dtype=self.dtype)[None, :, :],
    100               [self.ma_order, 1, 1]),
    101           dtype=self.dtype)
    102     super(VARMA, self)._define_parameters(
    103         observation_transition_tradeoff_log=observation_transition_tradeoff_log)
    104 
    105   def get_state_transition(self):
    106     """Construct state transition matrix from VARMA parameters.
    107 
    108     Returns:
    109       the state transition matrix. It has shape
    110         [self.state_dimendion, self.state_dimension].
    111     """
    112     # Pad any unused AR blocks with zeros. The extra state is necessary if
    113     # ma_order >= ar_order.
    114     ar_coefs_padded = array_ops.reshape(
    115         array_ops.pad(self.ar_coefs,
    116                       [[0, 0], [0, 0],
    117                        [0, self.state_num_blocks - self.ar_order]]),
    118         [self.num_features, self.state_dimension])
    119     shift_matrix = array_ops.pad(
    120         linalg_ops.eye(
    121             (self.state_num_blocks - 1) * self.num_features, dtype=self.dtype),
    122         [[0, 0], [0, self.num_features]])
    123     return array_ops.concat([ar_coefs_padded, shift_matrix], axis=0)
    124 
    125   def get_noise_transform(self):
    126     """Construct state noise transform matrix from VARMA parameters.
    127 
    128     Returns:
    129       the state noise transform matrix. It has shape
    130         [self.state_dimendion, self.num_features].
    131     """
    132     # Noise is broadcast, through the moving average coefficients, to
    133     # un-observed parts of the latent state.
    134     ma_coefs_padded = array_ops.reshape(
    135         array_ops.pad(self.ma_coefs,
    136                       [[self.state_num_blocks - 1 - self.ma_order, 0], [0, 0],
    137                        [0, 0]]),
    138         [(self.state_num_blocks - 1) * self.num_features, self.num_features],
    139         name="noise_transform")
    140     # Deterministically apply noise to the oldest component.
    141     return array_ops.concat(
    142         [ma_coefs_padded,
    143          linalg_ops.eye(self.num_features, dtype=self.dtype)],
    144         axis=0)
    145 
    146   def get_observation_model(self, times):
    147     """Construct observation model matrix from VARMA parameters.
    148 
    149     Args:
    150       times: A [batch size] vector indicating the times observation models are
    151           requested for. Unused.
    152     Returns:
    153       the observation model matrix. It has shape
    154         [self.num_features, self.state_dimension].
    155     """
    156     del times  # StateSpaceModel will broadcast along the batch dimension
    157     if self.ar_order > self.ma_order or self.state_num_blocks < 2:
    158       return array_ops.pad(
    159           linalg_ops.eye(self.num_features, dtype=self.dtype),
    160           [[0, 0], [0, self.num_features * (self.state_num_blocks - 1)]],
    161           name="observation_model")
    162     else:
    163       # Add a second observed component which "catches" the accumulated moving
    164       # average errors as they reach the end of the state. If ar_order >
    165       # ma_order, this is unnecessary, since accumulated errors cycle naturally.
    166       return array_ops.concat(
    167           [
    168               array_ops.pad(
    169                   linalg_ops.eye(self.num_features, dtype=self.dtype),
    170                   [[0, 0], [0,
    171                             self.num_features * (self.state_num_blocks - 2)]]),
    172               linalg_ops.eye(self.num_features, dtype=self.dtype)
    173           ],
    174           axis=1,
    175           name="observation_model")
    176 
    177   def get_state_transition_noise_covariance(
    178       self, minimum_initial_variance=1e-5):
    179     # Most state space models use only an explicit observation noise term to
    180     # model deviations from expectations, and so a low initial transition noise
    181     # parameter is helpful there. Since deviations from expectations are also
    182     # modeled as transition noise in VARMA, we set its initial value based on a
    183     # slight over-estimate empirical observation noise.
    184     if self._input_statistics is not None:
    185       feature_variance = self._scale_variance(
    186           self._input_statistics.series_start_moments.variance)
    187       initial_transition_noise_scale = math_ops.log(
    188           math_ops.maximum(
    189               math_ops.reduce_mean(feature_variance), minimum_initial_variance))
    190     else:
    191       initial_transition_noise_scale = 0.
    192     state_noise_transform = ops.convert_to_tensor(
    193         self.get_noise_transform(), dtype=self.dtype)
    194     state_noise_dimension = state_noise_transform.get_shape()[1].value
    195     return math_utils.variable_covariance_matrix(
    196         state_noise_dimension, "state_transition_noise",
    197         dtype=self.dtype,
    198         initial_overall_scale_log=initial_transition_noise_scale)
    199