Home | History | Annotate | Download | only in mappers
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Approximate kernel mapper for RBF kernel based on Random Fourier Features."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import math
     22 
     23 import numpy as np
     24 
     25 from tensorflow.contrib.kernel_methods.python.mappers import dense_kernel_mapper as dkm
     26 from tensorflow.python.framework import constant_op
     27 from tensorflow.python.framework import dtypes
     28 from tensorflow.python.ops import math_ops
     29 
     30 
     31 # TODO(sibyl-vie3Poto,felixyu): add an option to control whether the parameters in the
     32 # kernel map are trainable.
     33 class RandomFourierFeatureMapper(dkm.DenseKernelMapper):
     34   r"""Class that implements Random Fourier Feature Mapping (RFFM) in TensorFlow.
     35 
     36   The RFFM mapping is used to approximate the Gaussian (RBF) kernel:
     37   ```
     38   exp(-||x-y||_2^2 / (2 * sigma^2))
     39   ```
     40 
     41   The implementation of RFFM is based on the following paper:
     42   "Random Features for Large-Scale Kernel Machines" by Ali Rahimi and Ben Recht.
     43   (link: https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf)
     44 
     45   The mapping uses a matrix `Omega \in R^{d x D}` and a bias vector `b \in R^D`
     46   where `d` is the input dimension (number of dense input features) and `D` is
     47   the output dimension (i.e., dimension of the feature space the input is mapped
     48   to). Each entry of `Omega` is sampled i.i.d. from a (scaled) Gaussian
     49   distribution and each entry of `b` is sampled independently and uniformly from
     50   [0, 2 * pi].
     51 
     52   For a single input feature vector x in R^d, its RFFM is defined as:
     53   ```
     54       sqrt(2/D) * cos(x * Omega + b)
     55   ```
     56   where `cos` is the element-wise cosine function and `x, b` are represented as
     57   row vectors. The aforementioned paper shows that the linear kernel of
     58   RFFM-mapped vectors approximates the Gaussian kernel of the initial vectors.
     59 
     60   """
     61 
     62   def __init__(self, input_dim, output_dim, stddev=1.0, seed=1, name=None):
     63     """Constructs a RandomFourierFeatureMapper instance.
     64 
     65     Args:
     66       input_dim: The dimension (number of features) of the tensors to be mapped.
     67       output_dim: The output dimension of the mapping.
     68       stddev: The standard deviation of the Gaussian kernel to be approximated.
     69         The error of the classifier trained using this approximation is very
     70         sensitive to this parameter.
     71       seed: An integer used to initialize the parameters (`Omega` and `b`) of
     72         the mapper. For repeatable sequences across different invocations of the
     73         mapper object (for instance, to ensure consistent mapping both at
     74         training and eval/inference if these happen in different invocations),
     75         set this to the same integer.
     76       name: name for the mapper object.
     77     """
     78     # TODO(sibyl-vie3Poto): Maybe infer input_dim and/or output_dim (if not explicitly
     79     # provided). input_dim can be inferred lazily, the first time map is called.
     80     # output_dim can be inferred from input_dim using heuristics on the error of
     81     # the approximation (and, by extension, the error of the classification
     82     # based on the approximation).
     83     self._input_dim = input_dim
     84     self._output_dim = output_dim
     85     self._stddev = stddev
     86     self._seed = seed
     87     self._name = name
     88 
     89   @property
     90   def name(self):
     91     """Returns a name for the `RandomFourierFeatureMapper` instance.
     92 
     93     If the name provided in the constructor is `None`, then the object's unique
     94     id is returned.
     95 
     96     Returns:
     97       A name for the `RandomFourierFeatureMapper` instance.
     98     """
     99     return self._name or str(id(self))
    100 
    101   @property
    102   def input_dim(self):
    103     return self._input_dim
    104 
    105   @property
    106   def output_dim(self):
    107     return self._output_dim
    108 
    109   def map(self, input_tensor):
    110     """Maps each row of input_tensor using random Fourier features.
    111 
    112     Args:
    113       input_tensor: a `Tensor` containing input features. It's shape is
    114       [batch_size, self._input_dim].
    115 
    116     Returns:
    117       A `Tensor` of shape [batch_size, self._output_dim] containing RFFM-mapped
    118       features.
    119 
    120     Raises:
    121       InvalidShapeError: if the shape of the `input_tensor` is inconsistent with
    122         expected input dimension.
    123     """
    124     input_tensor_shape = input_tensor.get_shape()
    125     if len(input_tensor_shape) != 2:
    126       raise dkm.InvalidShapeError(
    127           'The shape of the tensor should be 2. Got %d instead.' %
    128           len(input_tensor_shape))
    129 
    130     features_dim = input_tensor_shape[1]
    131     if features_dim != self._input_dim:
    132       raise dkm.InvalidShapeError(
    133           'Invalid dimension: expected %d input features, got %d instead.' %
    134           (self._input_dim, features_dim))
    135 
    136     # Add ops that compute (deterministically) omega_matrix and bias based on
    137     # the provided seed.
    138     # TODO(sibyl-vie3Poto): Storing the mapper's parameters (omega_matrix and bias) as
    139     # constants incurs no RPC calls to the parameter server during distributed
    140     # training. However, if the parameters grow too large (for instance if they
    141     # don't fit into memory or if they blow up the size of the GraphDef proto),
    142     # stroring them as constants is no longer an option. In this case, we should
    143     # have a heuristic to choose out of one of the following alternatives:
    144     # a) store them as variables (in the parameter server)
    145     # b) store them as worker local variables
    146     # c) generating on the fly the omega matrix at each step
    147     np.random.seed(self._seed)
    148     omega_matrix_shape = [self._input_dim, self._output_dim]
    149     bias_shape = [self._output_dim]
    150 
    151     omega_matrix = constant_op.constant(
    152         np.random.normal(
    153             scale=1.0 / self._stddev, size=omega_matrix_shape),
    154         dtype=dtypes.float32)
    155     bias = constant_op.constant(
    156         np.random.uniform(
    157             low=0.0, high=2 * np.pi, size=bias_shape),
    158         dtype=dtypes.float32)
    159 
    160     x_omega_plus_bias = math_ops.add(
    161         math_ops.matmul(input_tensor, omega_matrix), bias)
    162     return math.sqrt(2.0 / self._output_dim) * math_ops.cos(x_omega_plus_bias)
    163