1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Approximate kernel mapper for RBF kernel based on Random Fourier Features.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import math 22 23 import numpy as np 24 25 from tensorflow.contrib.kernel_methods.python.mappers import dense_kernel_mapper as dkm 26 from tensorflow.python.framework import constant_op 27 from tensorflow.python.framework import dtypes 28 from tensorflow.python.ops import math_ops 29 30 31 # TODO(sibyl-vie3Poto,felixyu): add an option to control whether the parameters in the 32 # kernel map are trainable. 33 class RandomFourierFeatureMapper(dkm.DenseKernelMapper): 34 r"""Class that implements Random Fourier Feature Mapping (RFFM) in TensorFlow. 35 36 The RFFM mapping is used to approximate the Gaussian (RBF) kernel: 37 ``` 38 exp(-||x-y||_2^2 / (2 * sigma^2)) 39 ``` 40 41 The implementation of RFFM is based on the following paper: 42 "Random Features for Large-Scale Kernel Machines" by Ali Rahimi and Ben Recht. 43 (link: https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) 44 45 The mapping uses a matrix `Omega \in R^{d x D}` and a bias vector `b \in R^D` 46 where `d` is the input dimension (number of dense input features) and `D` is 47 the output dimension (i.e., dimension of the feature space the input is mapped 48 to). Each entry of `Omega` is sampled i.i.d. from a (scaled) Gaussian 49 distribution and each entry of `b` is sampled independently and uniformly from 50 [0, 2 * pi]. 51 52 For a single input feature vector x in R^d, its RFFM is defined as: 53 ``` 54 sqrt(2/D) * cos(x * Omega + b) 55 ``` 56 where `cos` is the element-wise cosine function and `x, b` are represented as 57 row vectors. The aforementioned paper shows that the linear kernel of 58 RFFM-mapped vectors approximates the Gaussian kernel of the initial vectors. 59 60 """ 61 62 def __init__(self, input_dim, output_dim, stddev=1.0, seed=1, name=None): 63 """Constructs a RandomFourierFeatureMapper instance. 64 65 Args: 66 input_dim: The dimension (number of features) of the tensors to be mapped. 67 output_dim: The output dimension of the mapping. 68 stddev: The standard deviation of the Gaussian kernel to be approximated. 69 The error of the classifier trained using this approximation is very 70 sensitive to this parameter. 71 seed: An integer used to initialize the parameters (`Omega` and `b`) of 72 the mapper. For repeatable sequences across different invocations of the 73 mapper object (for instance, to ensure consistent mapping both at 74 training and eval/inference if these happen in different invocations), 75 set this to the same integer. 76 name: name for the mapper object. 77 """ 78 # TODO(sibyl-vie3Poto): Maybe infer input_dim and/or output_dim (if not explicitly 79 # provided). input_dim can be inferred lazily, the first time map is called. 80 # output_dim can be inferred from input_dim using heuristics on the error of 81 # the approximation (and, by extension, the error of the classification 82 # based on the approximation). 83 self._input_dim = input_dim 84 self._output_dim = output_dim 85 self._stddev = stddev 86 self._seed = seed 87 self._name = name 88 89 @property 90 def name(self): 91 """Returns a name for the `RandomFourierFeatureMapper` instance. 92 93 If the name provided in the constructor is `None`, then the object's unique 94 id is returned. 95 96 Returns: 97 A name for the `RandomFourierFeatureMapper` instance. 98 """ 99 return self._name or str(id(self)) 100 101 @property 102 def input_dim(self): 103 return self._input_dim 104 105 @property 106 def output_dim(self): 107 return self._output_dim 108 109 def map(self, input_tensor): 110 """Maps each row of input_tensor using random Fourier features. 111 112 Args: 113 input_tensor: a `Tensor` containing input features. It's shape is 114 [batch_size, self._input_dim]. 115 116 Returns: 117 A `Tensor` of shape [batch_size, self._output_dim] containing RFFM-mapped 118 features. 119 120 Raises: 121 InvalidShapeError: if the shape of the `input_tensor` is inconsistent with 122 expected input dimension. 123 """ 124 input_tensor_shape = input_tensor.get_shape() 125 if len(input_tensor_shape) != 2: 126 raise dkm.InvalidShapeError( 127 'The shape of the tensor should be 2. Got %d instead.' % 128 len(input_tensor_shape)) 129 130 features_dim = input_tensor_shape[1] 131 if features_dim != self._input_dim: 132 raise dkm.InvalidShapeError( 133 'Invalid dimension: expected %d input features, got %d instead.' % 134 (self._input_dim, features_dim)) 135 136 # Add ops that compute (deterministically) omega_matrix and bias based on 137 # the provided seed. 138 # TODO(sibyl-vie3Poto): Storing the mapper's parameters (omega_matrix and bias) as 139 # constants incurs no RPC calls to the parameter server during distributed 140 # training. However, if the parameters grow too large (for instance if they 141 # don't fit into memory or if they blow up the size of the GraphDef proto), 142 # stroring them as constants is no longer an option. In this case, we should 143 # have a heuristic to choose out of one of the following alternatives: 144 # a) store them as variables (in the parameter server) 145 # b) store them as worker local variables 146 # c) generating on the fly the omega matrix at each step 147 np.random.seed(self._seed) 148 omega_matrix_shape = [self._input_dim, self._output_dim] 149 bias_shape = [self._output_dim] 150 151 omega_matrix = constant_op.constant( 152 np.random.normal( 153 scale=1.0 / self._stddev, size=omega_matrix_shape), 154 dtype=dtypes.float32) 155 bias = constant_op.constant( 156 np.random.uniform( 157 low=0.0, high=2 * np.pi, size=bias_shape), 158 dtype=dtypes.float32) 159 160 x_omega_plus_bias = math_ops.add( 161 math_ops.matmul(input_tensor, omega_matrix), bias) 162 return math.sqrt(2.0 / self._output_dim) * math_ops.cos(x_omega_plus_bias) 163