Home | History | Annotate | Download | only in public
      1 // Copyright 2015 Google Inc. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // bit_depth.h: defines the BitDepthSetting enum
     16 
     17 #ifndef GEMMLOWP_PUBLIC_BIT_DEPTH_H_
     18 #define GEMMLOWP_PUBLIC_BIT_DEPTH_H_
     19 
     20 namespace gemmlowp {
     21 
     22 // A specific bit depth to requantize an operand (Lhs or Rhs) to.
     23 // The case tBits==8 means no requantization, since at the moment
     24 // we only accept 8-bit input data.
     25 template <int tBits>
     26 struct BitDepth {
     27   static const int kBits = tBits;
     28   static_assert(kBits >= 1 && kBits <= 8, "bad bit depth");
     29 };
     30 
     31 // A rounding mode to use when requantizing an operand.
     32 // The requantizing operation is:
     33 //   dst = (src * maxval + rounding_offset) / 255;
     34 // Where dst and src are uint8, maxval is 2^(dstbits)-1,
     35 // and the intermediate values are computed as uint16s
     36 // so no overflow occurs.
     37 // The rounding_offset in the above formula is a value
     38 // in [0..254] determined by the RoundingMode as follows:
     39 enum class RoundingMode {
     40   Exact,                  // No rounding, do nothing. Use with bit_depth == 8.
     41   Nearest,                // rounding_offset = 127
     42   ProbabilisticXorshift,  // rounding_offset given by 8-bit Xorshift PRNG
     43   ProbabilisticAddmod     // rounding_offset given by 8-bit add/mod LDSG
     44 };
     45 
     46 // A rounding strategy is a heuristic for choosing a rounding mode.
     47 // When the bit depth is 8 bit like the source, there is no
     48 // quantization to be done, so this is moot. In this case, we use
     49 // the following "no-op" "strategy",
     50 struct ExactRoundingStrategyFor8Bit {
     51   static const RoundingMode kRoundingModeForSmallSizes = RoundingMode::Exact;
     52   static const RoundingMode kRoundingModeForLargeSizes = RoundingMode::Exact;
     53   static const int kRoundingModeSizeThreshold = 0;
     54 };
     55 
     56 // Default rounding strategy when actually requantizing to less than 8 bit.
     57 // Round-to-nearest tends to give the best results for small enough
     58 // accumulation sizes (i.e. accumulation depth, but we refrain from using
     59 // the word "depth" here as it gets confusing with "bit depth").
     60 // Some flavor of probabilistic tends to perform better for larger sizes.
     61 // See doc/less-than-8-bit.txt for details.
     62 struct DefaultRoundingStrategyForLessThan8Bit {
     63   static const RoundingMode kRoundingModeForSmallSizes = RoundingMode::Nearest;
     64   static const RoundingMode kRoundingModeForLargeSizes =
     65       RoundingMode::ProbabilisticAddmod;
     66 
     67   // The threshold on the depth dimension at which we switch to
     68   // probabilistic rounding instead of rounding-to-nearest when
     69   // requantizing input data. Indeed, both statistical theory and
     70   // empirical measurements show that for given input data and bit depth,
     71   // probabilistic rounding gives more accurate results for large enough
     72   // depth, while rounding-to-nearest does for smaller depth. This threshold
     73   // is naively determined from some experiments with Inception at 7bit/5bit
     74   // on a set of 10,000 images with 8-bit Xorshift probabilistic rounding:
     75   //
     76   //   7 bit weights, 5 bit activations, switch at 64:   59.82% top-1 accuracy
     77   //   7 bit weights, 5 bit activations, switch at 128:  59.58% top-1 accuracy
     78   //   7 bit weights, 5 bit activations, switch at 192:  63.37% top-1 accuracy
     79   //   7 bit weights, 5 bit activations, switch at 256:  63.47% top-1 accuracy
     80   //   7 bit weights, 5 bit activations, switch at 320:  63.71% top-1 accuracy
     81   //   7 bit weights, 5 bit activations, switch at 384:  63.71% top-1 accuracy
     82   //   7 bit weights, 5 bit activations, switch at 448:  63.58% top-1 accuracy
     83   //   7 bit weights, 5 bit activations, switch at 512:  64.10% top-1 accuracy
     84   //   7 bit weights, 5 bit activations, switch at 640:  62.49% top-1 accuracy
     85   //   7 bit weights, 5 bit activations, switch at 768:  62.49% top-1 accuracy
     86   //   7 bit weights, 5 bit activations, switch at 1024: 58.96% top-1 accuracy
     87   //
     88   // So here, 384 looks comfortably in the middle of a plateau of good values,
     89   // and it's a roundish number (3/2 * 256) so let's stick with that for now.
     90   // It would be nice to work out the theory of this, and understand how this
     91   // should depend on the distribution of inputs and the bit depth.
     92   //
     93   // Repeating the same evaluation with AddMod:
     94   //   7 bit weights, 5 bit activations, switch at 64:   62.65% top-1 accuracy
     95   //   7 bit weights, 5 bit activations, switch at 128:  62.65% top-1 accuracy
     96   //   7 bit weights, 5 bit activations, switch at 192:  63.81% top-1 accuracy
     97   //   7 bit weights, 5 bit activations, switch at 256:  64.23% top-1 accuracy
     98   //   7 bit weights, 5 bit activations, switch at 320:  64.16% top-1 accuracy
     99   //   7 bit weights, 5 bit activations, switch at 384:  64.16% top-1 accuracy
    100   //   7 bit weights, 5 bit activations, switch at 448:  64.16% top-1 accuracy
    101   //   7 bit weights, 5 bit activations, switch at 512:  64.52% top-1 accuracy
    102   //   7 bit weights, 5 bit activations, switch at 640:  62.74% top-1 accuracy
    103   //   7 bit weights, 5 bit activations, switch at 768:  62.74% top-1 accuracy
    104   //   7 bit weights, 5 bit activations, switch at 1024: 59.74% top-1 accuracy
    105   //
    106   // The behavior is similar, so 384 remains a good choice.
    107 
    108   static const int kRoundingModeSizeThreshold = 384;
    109 };
    110 
    111 struct DefaultL8R8BitDepthParams {
    112   typedef BitDepth<8> LhsBitDepth;
    113   typedef BitDepth<8> RhsBitDepth;
    114   typedef ExactRoundingStrategyFor8Bit RoundingStrategy;
    115 };
    116 
    117 struct DefaultL7R5BitDepthParams {
    118   typedef BitDepth<7> LhsBitDepth;
    119   typedef BitDepth<5> RhsBitDepth;
    120   typedef DefaultRoundingStrategyForLessThan8Bit RoundingStrategy;
    121 };
    122 
    123 }  // namespace gemmlowp
    124 
    125 #endif  // GEMMLOWP_PUBLIC_BIT_DEPTH_H_
    126