1 // Copyright 2015 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // bit_depth.h: defines the BitDepthSetting enum 16 17 #ifndef GEMMLOWP_PUBLIC_BIT_DEPTH_H_ 18 #define GEMMLOWP_PUBLIC_BIT_DEPTH_H_ 19 20 namespace gemmlowp { 21 22 // A specific bit depth to requantize an operand (Lhs or Rhs) to. 23 // The case tBits==8 means no requantization, since at the moment 24 // we only accept 8-bit input data. 25 template <int tBits> 26 struct BitDepth { 27 static const int kBits = tBits; 28 static_assert(kBits >= 1 && kBits <= 8, "bad bit depth"); 29 }; 30 31 // A rounding mode to use when requantizing an operand. 32 // The requantizing operation is: 33 // dst = (src * maxval + rounding_offset) / 255; 34 // Where dst and src are uint8, maxval is 2^(dstbits)-1, 35 // and the intermediate values are computed as uint16s 36 // so no overflow occurs. 37 // The rounding_offset in the above formula is a value 38 // in [0..254] determined by the RoundingMode as follows: 39 enum class RoundingMode { 40 Exact, // No rounding, do nothing. Use with bit_depth == 8. 41 Nearest, // rounding_offset = 127 42 ProbabilisticXorshift, // rounding_offset given by 8-bit Xorshift PRNG 43 ProbabilisticAddmod // rounding_offset given by 8-bit add/mod LDSG 44 }; 45 46 // A rounding strategy is a heuristic for choosing a rounding mode. 47 // When the bit depth is 8 bit like the source, there is no 48 // quantization to be done, so this is moot. In this case, we use 49 // the following "no-op" "strategy", 50 struct ExactRoundingStrategyFor8Bit { 51 static const RoundingMode kRoundingModeForSmallSizes = RoundingMode::Exact; 52 static const RoundingMode kRoundingModeForLargeSizes = RoundingMode::Exact; 53 static const int kRoundingModeSizeThreshold = 0; 54 }; 55 56 // Default rounding strategy when actually requantizing to less than 8 bit. 57 // Round-to-nearest tends to give the best results for small enough 58 // accumulation sizes (i.e. accumulation depth, but we refrain from using 59 // the word "depth" here as it gets confusing with "bit depth"). 60 // Some flavor of probabilistic tends to perform better for larger sizes. 61 // See doc/less-than-8-bit.txt for details. 62 struct DefaultRoundingStrategyForLessThan8Bit { 63 static const RoundingMode kRoundingModeForSmallSizes = RoundingMode::Nearest; 64 static const RoundingMode kRoundingModeForLargeSizes = 65 RoundingMode::ProbabilisticAddmod; 66 67 // The threshold on the depth dimension at which we switch to 68 // probabilistic rounding instead of rounding-to-nearest when 69 // requantizing input data. Indeed, both statistical theory and 70 // empirical measurements show that for given input data and bit depth, 71 // probabilistic rounding gives more accurate results for large enough 72 // depth, while rounding-to-nearest does for smaller depth. This threshold 73 // is naively determined from some experiments with Inception at 7bit/5bit 74 // on a set of 10,000 images with 8-bit Xorshift probabilistic rounding: 75 // 76 // 7 bit weights, 5 bit activations, switch at 64: 59.82% top-1 accuracy 77 // 7 bit weights, 5 bit activations, switch at 128: 59.58% top-1 accuracy 78 // 7 bit weights, 5 bit activations, switch at 192: 63.37% top-1 accuracy 79 // 7 bit weights, 5 bit activations, switch at 256: 63.47% top-1 accuracy 80 // 7 bit weights, 5 bit activations, switch at 320: 63.71% top-1 accuracy 81 // 7 bit weights, 5 bit activations, switch at 384: 63.71% top-1 accuracy 82 // 7 bit weights, 5 bit activations, switch at 448: 63.58% top-1 accuracy 83 // 7 bit weights, 5 bit activations, switch at 512: 64.10% top-1 accuracy 84 // 7 bit weights, 5 bit activations, switch at 640: 62.49% top-1 accuracy 85 // 7 bit weights, 5 bit activations, switch at 768: 62.49% top-1 accuracy 86 // 7 bit weights, 5 bit activations, switch at 1024: 58.96% top-1 accuracy 87 // 88 // So here, 384 looks comfortably in the middle of a plateau of good values, 89 // and it's a roundish number (3/2 * 256) so let's stick with that for now. 90 // It would be nice to work out the theory of this, and understand how this 91 // should depend on the distribution of inputs and the bit depth. 92 // 93 // Repeating the same evaluation with AddMod: 94 // 7 bit weights, 5 bit activations, switch at 64: 62.65% top-1 accuracy 95 // 7 bit weights, 5 bit activations, switch at 128: 62.65% top-1 accuracy 96 // 7 bit weights, 5 bit activations, switch at 192: 63.81% top-1 accuracy 97 // 7 bit weights, 5 bit activations, switch at 256: 64.23% top-1 accuracy 98 // 7 bit weights, 5 bit activations, switch at 320: 64.16% top-1 accuracy 99 // 7 bit weights, 5 bit activations, switch at 384: 64.16% top-1 accuracy 100 // 7 bit weights, 5 bit activations, switch at 448: 64.16% top-1 accuracy 101 // 7 bit weights, 5 bit activations, switch at 512: 64.52% top-1 accuracy 102 // 7 bit weights, 5 bit activations, switch at 640: 62.74% top-1 accuracy 103 // 7 bit weights, 5 bit activations, switch at 768: 62.74% top-1 accuracy 104 // 7 bit weights, 5 bit activations, switch at 1024: 59.74% top-1 accuracy 105 // 106 // The behavior is similar, so 384 remains a good choice. 107 108 static const int kRoundingModeSizeThreshold = 384; 109 }; 110 111 struct DefaultL8R8BitDepthParams { 112 typedef BitDepth<8> LhsBitDepth; 113 typedef BitDepth<8> RhsBitDepth; 114 typedef ExactRoundingStrategyFor8Bit RoundingStrategy; 115 }; 116 117 struct DefaultL7R5BitDepthParams { 118 typedef BitDepth<7> LhsBitDepth; 119 typedef BitDepth<5> RhsBitDepth; 120 typedef DefaultRoundingStrategyForLessThan8Bit RoundingStrategy; 121 }; 122 123 } // namespace gemmlowp 124 125 #endif // GEMMLOWP_PUBLIC_BIT_DEPTH_H_ 126