Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
     17 #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
     18 
     19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     20 #include "tensorflow/core/kernels/eigen_volume_patch.h"
     21 
     22 namespace Eigen {
     23 
     24 /** SpatialMaxPooling
     25  * \ingroup CXX11_NeuralNetworks_Module
     26  *
     27  * \brief Applies a max-pooling over a multichannel input image.
     28  *
     29  * The input parameter is expected to be a with a rank of 4 (channels, height,
     30  * width, others in col-major, and the reverse of that in row-major).
     31  *
     32  * The result can be assigned to a tensor of rank equal to the rank of the
     33  * input. The dimensions of the result will be channels, height, width, and
     34  * others (in col-major, and the reverse of that if the input was row-major).
     35  *
     36  * The order of the width and height dimensions can be swapped if needed.
     37  *
     38  */
     39 #if !defined(EIGEN_HAS_INDEX_LIST)
     40 template <typename Input>
     41 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
     42     const Eigen::DSizes<typename internal::traits<Input>::Index,
     43                         internal::traits<Input>::NumDimensions>,
     44     const TensorReductionOp<
     45         internal::MaxReducer<typename internal::remove_const<
     46             typename internal::traits<Input>::Scalar>::type>,
     47         const Eigen::array<int, 2>,
     48         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
     49 #else
     50 template <typename Input>
     51 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
     52     const Eigen::DSizes<typename internal::traits<Input>::Index,
     53                         internal::traits<Input>::NumDimensions>,
     54     const TensorReductionOp<
     55         internal::MaxReducer<typename internal::remove_const<
     56             typename internal::traits<Input>::Scalar>::type>,
     57         typename internal::conditional<
     58             internal::traits<Input>::Layout == ColMajor,
     59             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
     60             const Eigen::IndexList<Eigen::type2index<2>,
     61                                    Eigen::type2index<3> > >::type,
     62         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
     63 #endif
     64 SpatialMaxPooling(const Input& input, DenseIndex patchRows,
     65                   DenseIndex patchCols, DenseIndex strideRows,
     66                   DenseIndex strideCols, const PaddingType padding_type,
     67                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
     68   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
     69                       YOU_MADE_A_PROGRAMMING_MISTAKE);
     70 
     71   typedef typename internal::traits<Input>::Index TensorIndex;
     72   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
     73                    internal::traits<Input>::NumDimensions,
     74                    internal::traits<Input>::Layout, TensorIndex> >
     75       in(input);
     76 
     77   const DenseIndex patchRowsEff =
     78       patchRows + (patchRows - 1) * (in_strideRows - 1);
     79   const DenseIndex patchColsEff =
     80       patchCols + (patchCols - 1) * (in_strideCols - 1);
     81 
     82   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
     83   static const int idxRows = isColMajor ? 1 : 2;
     84   static const int idxCols = isColMajor ? 2 : 1;
     85 
     86   // Molds the output of the reduction into the shape expected by the user.
     87   // (assuming col-major):
     88   // - 1st dim: channels
     89   // - 2nd dim: output height
     90   // - 3rd dim: output width
     91   // - 4th dim and beyond: everything else including batch size
     92   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
     93       post_reduce_dims;
     94   post_reduce_dims[0] = in.dimension(0);
     95   if (padding_type == PADDING_VALID) {
     96     post_reduce_dims[idxRows] = Eigen::divup(
     97         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
     98         strideRows);
     99     post_reduce_dims[idxCols] = Eigen::divup(
    100         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
    101         strideCols);
    102   } else {
    103     post_reduce_dims[idxRows] = Eigen::divup(
    104         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
    105     post_reduce_dims[idxCols] = Eigen::divup(
    106         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
    107   }
    108   post_reduce_dims[3] = in.dimension(3);
    109 
    110 #if !defined(EIGEN_HAS_INDEX_LIST)
    111   // nvcc doesn't support cxx11
    112   Eigen::array<int, 2> reduction_dims;
    113   if (isColMajor) {
    114     reduction_dims[0] = 1;
    115     reduction_dims[1] = 2;
    116   } else {
    117     reduction_dims[0] = 2;
    118     reduction_dims[1] = 3;
    119   }
    120 #else
    121   // Take advantage of cxx11 to give the compiler information it can use to
    122   // optimize the code.
    123   typename internal::conditional<
    124       internal::traits<Input>::Layout == ColMajor,
    125       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
    126       const Eigen::IndexList<Eigen::type2index<2>,
    127                              Eigen::type2index<3> > >::type reduction_dims;
    128 #endif
    129 
    130   return input
    131       .extract_image_patches(
    132           patchRows, patchCols, strideRows, strideCols, in_strideRows,
    133           in_strideCols, padding_type,
    134           -Eigen::NumTraits<typename internal::remove_const<
    135               typename internal::traits<Input>::Scalar>::type>::highest())
    136       .maximum(reduction_dims)
    137       .reshape(post_reduce_dims);
    138 }
    139 
    140 /** CuboidMaxPooling
    141  * \ingroup CXX11_NeuralNetworks_Module
    142  *
    143  * \brief Applies a max-pooling over a multichannel input volume.
    144  *
    145  * The input parameter is expected to be a tensor with a rank of 5 (channels,
    146  * depth, height, width, others in col-major, and the reverse of that in
    147  * row-major).
    148  *
    149  * The result can be assigned to a tensor of rank equal to the rank of the
    150  * input. The dimensions of the result will be channels, depth, height, width,
    151  * and others (in col-major, and the reverse of that if the input was
    152  * row-major).
    153  *
    154  * The order of the depth, width and height dimensions can be swapped if
    155  * needed.
    156  *
    157  */
    158 #if !defined(EIGEN_HAS_INDEX_LIST)
    159 template <typename Input>
    160 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
    161     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
    162     const TensorReductionOp<
    163         internal::MaxReducer<float>, const Eigen::array<int, 1>,
    164         const TensorReshapingOp<
    165             const Eigen::DSizes<DenseIndex, 3>,
    166             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
    167                                       const Input> > > >
    168 #else
    169 template <typename Input>
    170 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
    171     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
    172     const TensorReductionOp<
    173         internal::MaxReducer<float>,
    174         const Eigen::IndexList<Eigen::type2index<1> >,
    175         const TensorReshapingOp<
    176             const Eigen::DSizes<DenseIndex, 3>,
    177             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
    178                                       const Input> > > >
    179 #endif
    180 CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
    181                  DenseIndex patchRows, DenseIndex patchCols,
    182                  DenseIndex stridePlanes, DenseIndex strideRows,
    183                  DenseIndex strideCols, const PaddingType padding_type) {
    184   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
    185                       YOU_MADE_A_PROGRAMMING_MISTAKE);
    186   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
    187 
    188   typedef typename internal::traits<Input>::Index TensorIndex;
    189   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
    190                    internal::traits<Input>::NumDimensions,
    191                    internal::traits<Input>::Layout, TensorIndex> >
    192       in(input);
    193 
    194   static const int idxPlanes = isColMajor ? 1 : 3;
    195   static const int idxRows = 2;
    196   static const int idxCols = isColMajor ? 3 : 1;
    197 
    198   // Molds the output of the reduction into the shape expected by the used
    199   // (assuming col-major):
    200   // - 1st dim: channels
    201   // - 2nd dim: output depth
    202   // - 3rd dim: output height
    203   // - 4th dim: output width
    204   // - 5th dim and beyond: everything else including batch size
    205   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
    206       post_reduce_dims;
    207   post_reduce_dims[0] = in.dimension(0);
    208   if (padding_type == PADDING_VALID) {
    209     post_reduce_dims[idxPlanes] = Eigen::divup(
    210         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
    211         stridePlanes);
    212     post_reduce_dims[idxRows] = Eigen::divup(
    213         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
    214         strideRows);
    215     post_reduce_dims[idxCols] = Eigen::divup(
    216         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
    217         strideCols);
    218   } else {
    219     post_reduce_dims[idxPlanes] = Eigen::divup(
    220         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
    221     post_reduce_dims[idxRows] = Eigen::divup(
    222         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
    223     post_reduce_dims[idxCols] = Eigen::divup(
    224         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
    225   }
    226   post_reduce_dims[4] = in.dimension(4);
    227 
    228   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
    229   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
    230   if (isColMajor) {
    231     pre_reduce_dims[0] = post_reduce_dims[0];
    232     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
    233                          post_reduce_dims[3] * post_reduce_dims[4];
    234   } else {
    235     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
    236                          post_reduce_dims[2] * post_reduce_dims[3];
    237     pre_reduce_dims[2] = post_reduce_dims[4];
    238   }
    239 
    240 #if !defined(EIGEN_HAS_INDEX_LIST)
    241   // nvcc doesn't support cxx11
    242   Eigen::array<int, 1> reduction_dims;
    243   reduction_dims[0] = 1;
    244 #else
    245   // Take advantage of cxx11 to give the compiler information it can use to
    246   // optimize the code.
    247   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
    248 #endif
    249   return input
    250       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
    251                               strideRows, strideCols, padding_type,
    252                               -Eigen::NumTraits<float>::highest())
    253       .reshape(pre_reduce_dims)
    254       .maximum(reduction_dims)
    255       .reshape(post_reduce_dims);
    256 }
    257 
    258 /** SpatialAvgPooling
    259  * \ingroup CXX11_NeuralNetworks_Module
    260  *
    261  * \brief Applies an average pooling over a multichannel input image.
    262  *
    263  * The input parameter is expected to be a tensor with a rank of 4 (channels,
    264  * height, width, others in col-major, and the reverse of that in row-major).
    265  *
    266  * The result can be assigned to a tensor of rank equal to the rank of the
    267  * input. The dimensions of the result will be channels, height, width, and
    268  * others (in col-major, and the reverse of that if the input was row-major).
    269  *
    270  * The order of the width and height dimensions can be swapped if needed.
    271  *
    272  */
    273 namespace internal {
    274 
    275 template <typename T>
    276 struct AvgPoolMeanReducer {
    277 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
    278   // We only support packet access for floats.
    279   static const bool PacketAccess = internal::is_same<T, float>::value;
    280 #else
    281   static const bool PacketAccess = false;
    282 #endif
    283   static const bool IsStateful = true;
    284 
    285   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
    286     typedef typename packet_traits<T>::type Packet;
    287     packetCount_ = pset1<Packet>(T(0.0));
    288   }
    289 
    290   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
    291     if (t != -Eigen::NumTraits<T>::highest()) {
    292       (*accum) = (*accum) + t;
    293       scalarCount_++;
    294     }
    295   }
    296 
    297   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
    298     return static_cast<T>(0);
    299   }
    300 
    301   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
    302     eigen_assert(scalarCount_ > 0);
    303     return accum / T(scalarCount_);
    304   }
    305 
    306 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
    307 #ifdef EIGEN_VECTORIZE_AVX512
    308 #define pequal(a, b)   \
    309   _mm512_castsi512_ps( \
    310       _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1))
    311 
    312   // The ternarylogic function immediate determines the values in the result
    313   // In the case below, 0xd8 implies (false_mask) ? (b) : (a)
    314   // For details, refer to the vpternlogd instruction table at
    315   // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
    316 
    317 #define psel(a, b, false_mask)                        \
    318   _mm512_castsi512_ps(_mm512_ternarylogic_epi32(      \
    319       _mm512_castps_si512(a), _mm512_castps_si512(b), \
    320       _mm512_castps_si512(false_mask), 0xd8))
    321 #elif defined EIGEN_VECTORIZE_AVX
    322 #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ)
    323 #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask)
    324 #else
    325 #define pequal(a, b) _mm_cmpeq_ps(a, b)
    326 #define psel(a, b, false_mask) \
    327   _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b))
    328 #endif
    329 
    330   template <typename Packet>
    331   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p,
    332                                                           Packet* accum) {
    333     reducePacketWithType(static_cast<T>(0), p, accum);
    334   }
    335 
    336   template <typename Packet>
    337   void reducePacketWithType(T, const Packet& p, Packet* accum) {
    338     Packet skip_mask =
    339         pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest()));
    340     (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask));
    341     packetCount_ = padd<Packet>(
    342         packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask));
    343   }
    344 
    345   template <typename Packet>
    346   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
    347     return pset1<Packet>(0);
    348   }
    349 
    350   template <typename Packet>
    351   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
    352   finalizePacket(const Packet& vaccum) const {
    353     return pdiv(vaccum, packetCount_);
    354   }
    355   template <typename Packet>
    356   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T
    357   finalizeBoth(const T saccum, const Packet& vaccum) const {
    358     return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_));
    359   }
    360 #endif
    361 
    362  protected:
    363   typedef typename packet_traits<T>::type Packet;
    364   int scalarCount_;
    365   Packet packetCount_;
    366 };
    367 
    368 template <typename Device>
    369 struct reducer_traits<AvgPoolMeanReducer<float>, Device> {
    370   enum {
    371     Cost = 1,
    372 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
    373     // We only support packet access for floats.
    374     PacketAccess = true
    375 #else
    376     PacketAccess = false
    377 #endif
    378   };
    379 };
    380 
    381 template <>
    382 struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> {
    383   enum { Cost = 1, PacketAccess = false };
    384 };
    385 
    386 }  // namespace internal
    387 
    388 #if !defined(EIGEN_HAS_INDEX_LIST)
    389 template <typename Input>
    390 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
    391     const Eigen::DSizes<typename internal::traits<Input>::Index,
    392                         internal::traits<Input>::NumDimensions>,
    393     const TensorReductionOp<
    394         internal::AvgPoolMeanReducer<typename internal::remove_const<
    395             typename internal::traits<Input>::Scalar>::type>,
    396         const Eigen::array<int, 2>,
    397         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
    398 #else
    399 template <typename Input>
    400 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
    401     const Eigen::DSizes<typename internal::traits<Input>::Index,
    402                         internal::traits<Input>::NumDimensions>,
    403     const TensorReductionOp<
    404         internal::AvgPoolMeanReducer<typename internal::remove_const<
    405             typename internal::traits<Input>::Scalar>::type>,
    406         typename internal::conditional<
    407             internal::traits<Input>::Layout == ColMajor,
    408             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
    409             const Eigen::IndexList<Eigen::type2index<2>,
    410                                    Eigen::type2index<3> > >::type,
    411         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
    412 #endif
    413 SpatialAvgPooling(const Input& input, DenseIndex patchRows,
    414                   DenseIndex patchCols, DenseIndex strideRows,
    415                   DenseIndex strideCols, const PaddingType padding_type,
    416                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
    417   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
    418                       YOU_MADE_A_PROGRAMMING_MISTAKE);
    419 
    420   typedef typename internal::traits<Input>::Index TensorIndex;
    421   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
    422                    internal::traits<Input>::NumDimensions,
    423                    internal::traits<Input>::Layout, TensorIndex> >
    424       in(input);
    425 
    426   const DenseIndex patchRowsEff =
    427       patchRows + (patchRows - 1) * (in_strideRows - 1);
    428   const DenseIndex patchColsEff =
    429       patchCols + (patchCols - 1) * (in_strideCols - 1);
    430 
    431   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
    432   static const int idxRows = isColMajor ? 1 : 2;
    433   static const int idxCols = isColMajor ? 2 : 1;
    434 
    435   // Molds the output of the reduction into the shape expected by the user.
    436   // (assuming col-major):
    437   // - 1st dim: channels
    438   // - 2nd dim: output height
    439   // - 3rd dim: output width
    440   // - 4th dim and beyond: everything else including batch size
    441   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
    442       post_reduce_dims;
    443   post_reduce_dims[0] = in.dimension(0);
    444   if (padding_type == PADDING_VALID) {
    445     post_reduce_dims[idxRows] = Eigen::divup(
    446         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
    447         strideRows);
    448     post_reduce_dims[idxCols] = Eigen::divup(
    449         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
    450         strideCols);
    451   } else {
    452     post_reduce_dims[idxRows] = Eigen::divup(
    453         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
    454     post_reduce_dims[idxCols] = Eigen::divup(
    455         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
    456   }
    457   post_reduce_dims[3] = in.dimension(3);
    458 
    459   typedef typename internal::remove_const<
    460       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
    461   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
    462 
    463 #if !defined(EIGEN_HAS_INDEX_LIST)
    464   // nvcc doesn't support cxx11
    465   Eigen::array<int, 2> reduction_dims;
    466   if (isColMajor) {
    467     reduction_dims[0] = 1;
    468     reduction_dims[1] = 2;
    469   } else {
    470     reduction_dims[0] = 2;
    471     reduction_dims[1] = 3;
    472   }
    473 #else
    474   // Take advantage of cxx11 to give the compiler information it can use to
    475   // optimize the code.
    476   typename internal::conditional<
    477       internal::traits<Input>::Layout == ColMajor,
    478       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
    479       const Eigen::IndexList<Eigen::type2index<2>,
    480                              Eigen::type2index<3> > >::type reduction_dims;
    481 #endif
    482   return input
    483       .extract_image_patches(
    484           patchRows, patchCols, strideRows, strideCols, in_strideRows,
    485           in_strideCols, padding_type,
    486           -Eigen::NumTraits<typename internal::remove_const<
    487               typename internal::traits<Input>::Scalar>::type>::highest())
    488       .reduce(reduction_dims, mean_with_nan)
    489       .reshape(post_reduce_dims);
    490 }
    491 
    492 /** CuboidAvgPooling
    493  * \ingroup CXX11_NeuralNetworks_Module
    494  *
    495  * \brief Applies an average pooling over a multichannel input volume.
    496  *
    497  * The input parameter is expected to be a tensor with a rank of 5 (channels,
    498  * depth, height, width, others, and the reverse of that in row-major).
    499  *
    500  * The result can be assigned to a tensor of rank equal to the rank of the
    501  * input. The dimensions of the result will be channels, depth, width, and
    502  * others (in col-major, and the reverse of that if the input was row-major).
    503  *
    504  * The order of the depth, width and height dimensions can be swapped if
    505  * needed.
    506  *
    507  */
    508 #if !defined(EIGEN_HAS_INDEX_LIST)
    509 template <typename Input>
    510 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
    511     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
    512     const TensorReductionOp<
    513         internal::AvgPoolMeanReducer<float>, const Eigen::array<int, 1>,
    514         const TensorReshapingOp<
    515             const Eigen::DSizes<DenseIndex, 3>,
    516             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
    517                                       const Input> > > >
    518 #else
    519 template <typename Input>
    520 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
    521     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
    522     const TensorReductionOp<
    523         internal::AvgPoolMeanReducer<float>,
    524         const Eigen::IndexList<Eigen::type2index<1> >,
    525         const TensorReshapingOp<
    526             const Eigen::DSizes<DenseIndex, 3>,
    527             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
    528                                       const Input> > > >
    529 #endif
    530 CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
    531                  DenseIndex patchRows, DenseIndex patchCols,
    532                  DenseIndex stridePlanes, DenseIndex strideRows,
    533                  DenseIndex strideCols, const PaddingType padding_type) {
    534   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
    535                       YOU_MADE_A_PROGRAMMING_MISTAKE);
    536   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
    537 
    538   typedef typename internal::traits<Input>::Index TensorIndex;
    539   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
    540                    internal::traits<Input>::NumDimensions,
    541                    internal::traits<Input>::Layout, TensorIndex> >
    542       in(input);
    543 
    544   static const int idxPlanes = isColMajor ? 1 : 3;
    545   static const int idxRows = 2;
    546   static const int idxCols = isColMajor ? 3 : 1;
    547   // Molds the output of the reduction into the shape expected by the used
    548   // (assuming col-major):
    549   // - 1st dim: channels
    550   // - 2nd dim: outupt depth
    551   // - 3rd dim: output height
    552   // - 4th dim: output width
    553   // - 5th dim and beyond: everything else including batch size
    554   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
    555       post_reduce_dims;
    556   post_reduce_dims[0] = in.dimension(0);
    557   if (padding_type == PADDING_VALID) {
    558     post_reduce_dims[idxPlanes] = Eigen::divup(
    559         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
    560         stridePlanes);
    561     post_reduce_dims[idxRows] = Eigen::divup(
    562         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
    563         strideRows);
    564     post_reduce_dims[idxCols] = Eigen::divup(
    565         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
    566         strideCols);
    567   } else {
    568     post_reduce_dims[idxPlanes] = Eigen::divup(
    569         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
    570     post_reduce_dims[idxRows] = Eigen::divup(
    571         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
    572     post_reduce_dims[idxCols] = Eigen::divup(
    573         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
    574   }
    575   post_reduce_dims[4] = in.dimension(4);
    576 
    577   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
    578   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
    579   if (isColMajor) {
    580     pre_reduce_dims[0] = post_reduce_dims[0];
    581     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
    582                          post_reduce_dims[3] * post_reduce_dims[4];
    583   } else {
    584     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
    585                          post_reduce_dims[2] * post_reduce_dims[3];
    586     pre_reduce_dims[2] = post_reduce_dims[4];
    587   }
    588 
    589   typedef typename internal::remove_const<
    590       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
    591   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
    592 
    593 #if !defined(EIGEN_HAS_INDEX_LIST)
    594   // nvcc doesn't support cxx11
    595   Eigen::array<int, 1> reduction_dims;
    596   reduction_dims[0] = 1;
    597 #else
    598   // Take advantage of cxx11 to give the compiler information it can use to
    599   // optimize the code.
    600   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
    601 #endif
    602   return input
    603       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
    604                               strideRows, strideCols, padding_type,
    605                               -Eigen::NumTraits<float>::highest())
    606       .reshape(pre_reduce_dims)
    607       .reduce(reduction_dims, mean_with_nan)
    608       .reshape(post_reduce_dims);
    609 }
    610 
    611 }  // end namespace Eigen
    612 
    613 #endif  // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
    614