Home | History | Annotate | Download | only in lib
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_
     17 #define TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_
     18 
     19 #include <algorithm>
     20 #include <cmath>
     21 #include <limits>
     22 #include <type_traits>
     23 #include <vector>
     24 
     25 #include "tensorflow/stream_executor/platform/logging.h"
     26 #include "tensorflow/stream_executor/platform/port.h"
     27 
     28 namespace perftools {
     29 namespace gputools {
     30 namespace port {
     31 
     32 class MathUtil {
     33  public:
     34   template <typename IntegralType>
     35   static IntegralType CeilOfRatio(IntegralType numerator,
     36                                   IntegralType denominator) {
     37     return CeilOrFloorOfRatio<IntegralType, true>(numerator, denominator);
     38   }
     39   template <typename IntegralType>
     40   static IntegralType FloorOfRatio(IntegralType numerator,
     41                                    IntegralType denominator) {
     42     return CeilOrFloorOfRatio<IntegralType, false>(numerator, denominator);
     43   }
     44   template <typename IntegralType, bool ceil>
     45   static IntegralType CeilOrFloorOfRatio(IntegralType numerator,
     46                                          IntegralType denominator);
     47 };
     48 
     49 // ---- CeilOrFloorOfRatio ----
     50 // This is a branching-free, cast-to-double-free implementation.
     51 //
     52 // Casting to double is in general incorrect because of loss of precision
     53 // when casting an int64 into a double.
     54 //
     55 // There's a bunch of 'recipes' to compute a integer ceil (or floor) on the web,
     56 // and most of them are incorrect.
     57 template<typename IntegralType, bool ceil>
     58 IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator,
     59                                           IntegralType denominator) {
     60   static_assert(std::is_integral<IntegralType>::value,
     61                  "CeilOfRatio_is_only_defined_for_integral_types");
     62   assert(denominator != 0);
     63   // Dividing the smallest signed integer by -1 is not supported: it would
     64   // SIGFPE
     65   assert(!std::is_signed<IntegralType>::value ||
     66          numerator != std::numeric_limits<IntegralType>::min() ||
     67          denominator != -1);
     68 
     69   const IntegralType rounded_toward_zero = numerator / denominator;
     70   const IntegralType intermediate_product = rounded_toward_zero * denominator;
     71 
     72   if (ceil) {  // Compile-time condition: not an actual branching
     73     // When rounded_toward_zero is negative, then an adjustment is never needed:
     74     // the real ratio is negative, and so rounded toward zero is the ceil.
     75     // When rounded_toward_zero is non-negative, an adjustment is needed if the
     76     // sign of the difference numerator - intermediate_product is the same as
     77     // the sign of the denominator.
     78     //
     79     // Using a bool and then a static_cast to IntegralType is not strictly
     80     // necessary, but it makes the code clear, and anyway the compiler should
     81     // get rid of it.
     82     const bool needs_adjustment = (rounded_toward_zero >= 0) &&
     83         ((denominator > 0 && numerator > intermediate_product) ||
     84             (denominator < 0 && numerator < intermediate_product));
     85     const IntegralType adjustment = static_cast<IntegralType>(needs_adjustment);
     86     const IntegralType ceil_of_ratio = rounded_toward_zero + adjustment;
     87     return ceil_of_ratio;
     88   } else {
     89     // Floor case: symmetrical to the previous one
     90     const bool needs_adjustment = (rounded_toward_zero <= 0) &&
     91         ((denominator > 0 && numerator < intermediate_product) ||
     92          (denominator < 0 && numerator > intermediate_product));
     93     const IntegralType adjustment = static_cast<IntegralType>(needs_adjustment);
     94     const IntegralType floor_of_ratio = rounded_toward_zero - adjustment;
     95     return floor_of_ratio;
     96   }
     97 }
     98 
     99 }  // namespace port
    100 }  // namespace gputools
    101 }  // namespace perftools
    102 
    103 #endif  // TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_
    104