1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 r"""Multivariate autoregressive model (vector autoregression). 16 17 Implements the following model (num_blocks = max(ar_order, ma_order + 1)): 18 19 y(t, 1) = \sum_{i=1}^{ar_order} ar_coefs[i] * y(t - 1, i) 20 y(t, i) = y(t - 1, i - 1) + ma_coefs[i - 1] * e(t) for 1 < i < num_blocks 21 y(t, num_blocks) = y(t - 1, num_blocks - 1) + e(t) 22 23 Where e(t) are Gaussian with zero mean and learned covariance. 24 25 Each element of ar_coefs and ma_coefs is a [num_features x num_features] 26 matrix. Each y(t, i) is a vector of length num_features. Indices in the above 27 equations are one-based. Initial conditions y(0, i) come from prior state (which 28 may either be learned or left as a constant with high prior covariance). 29 30 If ar_order > ma_order, the observation model is: 31 y(t, 1) + observation_noise(t) 32 33 If ma_order >= ar_order, it is (to observe the moving average component): 34 y(t, 1) + y(t, num_blocks) + observation_noise(t) 35 36 Where observation_noise(t) are Gaussian with zero mean and learned covariance. 37 38 This implementation uses a formulation which puts all of the autoregressive 39 coefficients in the transition equation for the observed component, which 40 enables learning using truncated backpropagation. Noise is not applied directly 41 to the observed component (with the exception of standard observation noise), 42 which further aids learning of the autoregressive coefficients when VARMA is in 43 an ensemble with other models (in which case having an observation noise term is 44 usually unavoidable). 45 """ 46 47 from __future__ import absolute_import 48 from __future__ import division 49 from __future__ import print_function 50 51 from tensorflow.contrib.timeseries.python.timeseries import math_utils 52 from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model 53 54 from tensorflow.python.framework import ops 55 from tensorflow.python.ops import array_ops 56 from tensorflow.python.ops import init_ops 57 from tensorflow.python.ops import linalg_ops 58 from tensorflow.python.ops import math_ops 59 from tensorflow.python.ops import variable_scope 60 61 62 class VARMA(state_space_model.StateSpaceModel): 63 """A VARMA model implementation as a special case of the state space model.""" 64 65 def __init__(self, 66 autoregressive_order, 67 moving_average_order, 68 configuration=state_space_model.StateSpaceModelConfiguration()): 69 """Construct a VARMA model. 70 71 The size of the latent state for this model is: 72 num_features * max(autoregressive_order, moving_average_order + 1) 73 Square matrices of this size are constructed and multiplied. 74 75 Args: 76 autoregressive_order: The maximum autoregressive lag. 77 moving_average_order: The maximum moving average lag, after which 78 transient deviations are expected to return to their long-term mean. 79 configuration: A StateSpaceModelConfiguration object. 80 """ 81 self.ar_order = autoregressive_order 82 self.ma_order = moving_average_order 83 self.state_num_blocks = max(autoregressive_order, moving_average_order + 1) 84 super(VARMA, self).__init__(configuration=configuration) 85 self.state_dimension = self.state_num_blocks * self.num_features 86 87 def _define_parameters(self, observation_transition_tradeoff_log=None): 88 with variable_scope.variable_scope(self._variable_scope): 89 # TODO(allenl): Evaluate parameter transformations for AR/MA coefficients 90 # which improve interpretability/stability. 91 self.ar_coefs = variable_scope.get_variable( 92 name="ar_coefs", 93 shape=[self.num_features, self.num_features, self.ar_order], 94 dtype=self.dtype, 95 initializer=init_ops.zeros_initializer()) 96 self.ma_coefs = variable_scope.get_variable( 97 name="ma_coefs", 98 initializer=array_ops.tile( 99 linalg_ops.eye(self.num_features, dtype=self.dtype)[None, :, :], 100 [self.ma_order, 1, 1]), 101 dtype=self.dtype) 102 super(VARMA, self)._define_parameters( 103 observation_transition_tradeoff_log=observation_transition_tradeoff_log) 104 105 def get_state_transition(self): 106 """Construct state transition matrix from VARMA parameters. 107 108 Returns: 109 the state transition matrix. It has shape 110 [self.state_dimendion, self.state_dimension]. 111 """ 112 # Pad any unused AR blocks with zeros. The extra state is necessary if 113 # ma_order >= ar_order. 114 ar_coefs_padded = array_ops.reshape( 115 array_ops.pad(self.ar_coefs, 116 [[0, 0], [0, 0], 117 [0, self.state_num_blocks - self.ar_order]]), 118 [self.num_features, self.state_dimension]) 119 shift_matrix = array_ops.pad( 120 linalg_ops.eye( 121 (self.state_num_blocks - 1) * self.num_features, dtype=self.dtype), 122 [[0, 0], [0, self.num_features]]) 123 return array_ops.concat([ar_coefs_padded, shift_matrix], axis=0) 124 125 def get_noise_transform(self): 126 """Construct state noise transform matrix from VARMA parameters. 127 128 Returns: 129 the state noise transform matrix. It has shape 130 [self.state_dimendion, self.num_features]. 131 """ 132 # Noise is broadcast, through the moving average coefficients, to 133 # un-observed parts of the latent state. 134 ma_coefs_padded = array_ops.reshape( 135 array_ops.pad(self.ma_coefs, 136 [[self.state_num_blocks - 1 - self.ma_order, 0], [0, 0], 137 [0, 0]]), 138 [(self.state_num_blocks - 1) * self.num_features, self.num_features], 139 name="noise_transform") 140 # Deterministically apply noise to the oldest component. 141 return array_ops.concat( 142 [ma_coefs_padded, 143 linalg_ops.eye(self.num_features, dtype=self.dtype)], 144 axis=0) 145 146 def get_observation_model(self, times): 147 """Construct observation model matrix from VARMA parameters. 148 149 Args: 150 times: A [batch size] vector indicating the times observation models are 151 requested for. Unused. 152 Returns: 153 the observation model matrix. It has shape 154 [self.num_features, self.state_dimension]. 155 """ 156 del times # StateSpaceModel will broadcast along the batch dimension 157 if self.ar_order > self.ma_order or self.state_num_blocks < 2: 158 return array_ops.pad( 159 linalg_ops.eye(self.num_features, dtype=self.dtype), 160 [[0, 0], [0, self.num_features * (self.state_num_blocks - 1)]], 161 name="observation_model") 162 else: 163 # Add a second observed component which "catches" the accumulated moving 164 # average errors as they reach the end of the state. If ar_order > 165 # ma_order, this is unnecessary, since accumulated errors cycle naturally. 166 return array_ops.concat( 167 [ 168 array_ops.pad( 169 linalg_ops.eye(self.num_features, dtype=self.dtype), 170 [[0, 0], [0, 171 self.num_features * (self.state_num_blocks - 2)]]), 172 linalg_ops.eye(self.num_features, dtype=self.dtype) 173 ], 174 axis=1, 175 name="observation_model") 176 177 def get_state_transition_noise_covariance( 178 self, minimum_initial_variance=1e-5): 179 # Most state space models use only an explicit observation noise term to 180 # model deviations from expectations, and so a low initial transition noise 181 # parameter is helpful there. Since deviations from expectations are also 182 # modeled as transition noise in VARMA, we set its initial value based on a 183 # slight over-estimate empirical observation noise. 184 if self._input_statistics is not None: 185 feature_variance = self._scale_variance( 186 self._input_statistics.series_start_moments.variance) 187 initial_transition_noise_scale = math_ops.log( 188 math_ops.maximum( 189 math_ops.reduce_mean(feature_variance), minimum_initial_variance)) 190 else: 191 initial_transition_noise_scale = 0. 192 state_noise_transform = ops.convert_to_tensor( 193 self.get_noise_transform(), dtype=self.dtype) 194 state_noise_dimension = state_noise_transform.get_shape()[1].value 195 return math_utils.variable_covariance_matrix( 196 state_noise_dimension, "state_transition_noise", 197 dtype=self.dtype, 198 initial_overall_scale_log=initial_transition_noise_scale) 199