Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #define EIGEN_USE_THREADS
     17 
     18 #include <functional>
     19 #include <memory>
     20 #include <vector>
     21 
     22 #include "tensorflow/core/framework/allocator.h"
     23 #include "tensorflow/core/framework/fake_input.h"
     24 #include "tensorflow/core/framework/node_def_builder.h"
     25 #include "tensorflow/core/framework/op_kernel.h"
     26 #include "tensorflow/core/framework/tensor.h"
     27 #include "tensorflow/core/framework/tensor_testutil.h"
     28 #include "tensorflow/core/framework/types.h"
     29 #include "tensorflow/core/framework/types.pb.h"
     30 #include "tensorflow/core/kernels/ops_testutil.h"
     31 #include "tensorflow/core/kernels/ops_util.h"
     32 #include "tensorflow/core/kernels/quantization_utils.h"
     33 #include "tensorflow/core/lib/core/status_test_util.h"
     34 #include "tensorflow/core/platform/test.h"
     35 
     36 namespace tensorflow {
     37 
     38 class QuantizedMatMulTest : public OpsTestBase {
     39  protected:
     40 };
     41 
     42 // Runs two small matrices through the operator, and leaves all the parameters
     43 // at their default values.
     44 TEST_F(QuantizedMatMulTest, Small_NoParams) {
     45   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
     46                    .Input(FakeInput(DT_QUINT8))
     47                    .Input(FakeInput(DT_QUINT8))
     48                    .Input(FakeInput(DT_FLOAT))
     49                    .Input(FakeInput(DT_FLOAT))
     50                    .Input(FakeInput(DT_FLOAT))
     51                    .Input(FakeInput(DT_FLOAT))
     52                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
     53                    .Finalize(node_def()));
     54   TF_ASSERT_OK(InitOp());
     55   // A matrix is:
     56   // |  1 |  2 |  3 |
     57   // |  4 |  5 |  6 |
     58   AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
     59   // B matrix is:
     60   // |  7 |  8 |  9 | 10 |
     61   // | 11 | 12 | 13 | 14 |
     62   // | 15 | 16 | 17 | 18 |
     63   AddInputFromArray<quint8>(TensorShape({3, 4}),
     64                             {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
     65   AddInputFromArray<float>(TensorShape({1}), {0});
     66   AddInputFromArray<float>(TensorShape({1}), {255.0f});
     67   AddInputFromArray<float>(TensorShape({1}), {0});
     68   AddInputFromArray<float>(TensorShape({1}), {255.0f});
     69 
     70   TF_ASSERT_OK(RunOpKernel());
     71   // Here are the results we expect, from hand calculations:
     72   // (1 * 7) + (2 * 11) + (3 * 15) = 74
     73   // (1 * 8) + (2 * 12) + (3 * 16) = 80
     74   // (1 * 9) + (2 * 13) + (3 * 17) = 86
     75   // (1 * 10) + (2 * 14) + (3 * 18) = 92
     76   // (4 * 7) + (5 * 11) + (6 * 15) = 173
     77   // (4 * 8) + (5 * 12) + (6 * 16) = 188
     78   // (4 * 9) + (5 * 13) + (6 * 17) = 203
     79   // (4 * 10) + (5 * 14) + (6 * 18) = 218
     80   Tensor expected(allocator(), DT_QINT32, TensorShape({2, 4}));
     81   test::FillValues<qint32>(&expected, {74, 80, 86, 92, 173, 188, 203, 218});
     82   test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
     83 }
     84 
     85 // This test multiplies two 1x1 8bit matrices, and compares the
     86 // results with hand-calculated expectations.
     87 TEST_F(QuantizedMatMulTest, VerySmall_WithParams) {
     88   // These parameters reflect a typical production usage of eight-bit matmuls
     89   // in an Inception-style network.
     90   const bool transpose_a = true;
     91   const int a_rows = 1;
     92   const int a_cols = 1;
     93   const int b_rows = 1;
     94   const int b_cols = 1;
     95   const bool transpose_b = false;
     96   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
     97                    .Input(FakeInput(DT_QUINT8))
     98                    .Input(FakeInput(DT_QUINT8))
     99                    .Input(FakeInput(DT_FLOAT))
    100                    .Input(FakeInput(DT_FLOAT))
    101                    .Input(FakeInput(DT_FLOAT))
    102                    .Input(FakeInput(DT_FLOAT))
    103                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
    104                    .Attr("transpose_a", transpose_a)
    105                    .Attr("transpose_b", transpose_b)
    106                    .Finalize(node_def()));
    107   TF_ASSERT_OK(InitOp());
    108   // The A matrix is:
    109   // |  -1 |
    110   // The input array only contains unsigned bytes, so we specify the actual
    111   // values as n+a_offset, where a_offset is 12 above. For example that means -1
    112   // is represented as -1 + 12, or 11.
    113   // We have set the transpose_a flag to true, so the matrix is transposed, and
    114   // for filling the values the in-memory storage order is effectively
    115   // column major, rather than the default row-major.
    116   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
    117 
    118   // The B matrix is:
    119   // |   1 |
    120   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
    121   AddInputFromArray<float>(TensorShape({1}), {-12.0f});
    122   AddInputFromArray<float>(TensorShape({1}), {243.0f});
    123   AddInputFromArray<float>(TensorShape({1}), {1.0f});
    124   AddInputFromArray<float>(TensorShape({1}), {256.0f});
    125   TF_ASSERT_OK(RunOpKernel());
    126   // We're requesting C = A.transposed() * B,
    127   // so we expect to get these results:
    128   // 1*-1 = -1
    129   // | -1 |
    130   Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols}));
    131   test::FillValues<qint32>(&expected, {-1});
    132   test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
    133 }
    134 
    135 // This test multiplies two 1x1 8bit matrices, but sets an invalid quantization
    136 // range, so we expect to get an error
    137 TEST_F(QuantizedMatMulTest, VerySmall_BadRange) {
    138   // These parameters reflect a typical production usage of eight-bit matmuls
    139   // in an Inception-style network.
    140   const bool transpose_a = true;
    141   const int a_rows = 1;
    142   const int a_cols = 1;
    143   const int b_rows = 1;
    144   const int b_cols = 1;
    145   const bool transpose_b = false;
    146   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
    147                    .Input(FakeInput(DT_QUINT8))
    148                    .Input(FakeInput(DT_QUINT8))
    149                    .Input(FakeInput(DT_FLOAT))
    150                    .Input(FakeInput(DT_FLOAT))
    151                    .Input(FakeInput(DT_FLOAT))
    152                    .Input(FakeInput(DT_FLOAT))
    153                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
    154                    .Attr("transpose_a", transpose_a)
    155                    .Attr("transpose_b", transpose_b)
    156                    .Finalize(node_def()));
    157   TF_ASSERT_OK(InitOp());
    158   // The A matrix is:
    159   // |  -1 |
    160   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
    161 
    162   // The B matrix is:
    163   // |   1 |
    164   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
    165   AddInputFromArray<float>(TensorShape({1}), {-12.0f});
    166   AddInputFromArray<float>(TensorShape({1}), {243.0f});
    167   // Here we set the range so that the min and max are equal, so we expect to
    168   // see an error when we run.
    169   AddInputFromArray<float>(TensorShape({1}), {1.0f});
    170   AddInputFromArray<float>(TensorShape({1}), {1.0f});
    171   EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
    172 }
    173 
    174 // This test multiplies a couple of small 8-bit matrices, and compares the
    175 // results with hand-calculated expectations. It uses shifts and offsets to
    176 // control the range of the outputs.
    177 TEST_F(QuantizedMatMulTest, Small_WithParams) {
    178   // These parameters reflect a typical production usage of eight-bit matmuls
    179   // in an Inception-style network.
    180   const bool transpose_a = true;
    181   const int a_rows = 3;
    182   const int a_cols = 4;
    183   const int b_rows = 3;
    184   const int b_cols = 2;
    185   const bool transpose_b = false;
    186   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
    187                    .Input(FakeInput(DT_QUINT8))
    188                    .Input(FakeInput(DT_QUINT8))
    189                    .Input(FakeInput(DT_FLOAT))
    190                    .Input(FakeInput(DT_FLOAT))
    191                    .Input(FakeInput(DT_FLOAT))
    192                    .Input(FakeInput(DT_FLOAT))
    193                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
    194                    .Attr("transpose_a", transpose_a)
    195                    .Attr("transpose_b", transpose_b)
    196                    .Finalize(node_def()));
    197   TF_ASSERT_OK(InitOp());
    198   // The A matrix is:
    199   // |  -1 |  -5 |  -9 |
    200   // |  -2 |  -6 | -10 |
    201   // |  -3 |  -7 | -11 |
    202   // |  -4 |  -8 | -12 |
    203   // The input array only contains unsigned bytes, so we specify the actual
    204   // values as n+a_offset, where a_offset is 12 above. For example that means -1
    205   // is represented as -1 + 12, or 11.
    206   // We have set the transpose_a flag to true, so the matrix is transposed, and
    207   // for filling the values the in-memory storage order is effectively
    208   // column major, rather than the default row-major.
    209   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {
    210                                                                11,
    211                                                                10,
    212                                                                9,
    213                                                                8,
    214                                                                7,
    215                                                                6,
    216                                                                5,
    217                                                                4,
    218                                                                3,
    219                                                                2,
    220                                                                1,
    221                                                                0,
    222                                                            });
    223 
    224   // The B matrix is:
    225   // |   1 |   4|
    226   // |   2 |   5|
    227   // |   3 |   6|
    228   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {
    229                                                                1,
    230                                                                4,
    231                                                                2,
    232                                                                5,
    233                                                                3,
    234                                                                6,
    235                                                            });
    236   AddInputFromArray<float>(TensorShape({1}), {-12.0f});
    237   AddInputFromArray<float>(TensorShape({1}), {243.0f});
    238   AddInputFromArray<float>(TensorShape({1}), {0});
    239   AddInputFromArray<float>(TensorShape({1}), {255.0f});
    240   TF_ASSERT_OK(RunOpKernel());
    241   // We're requesting C = A.transposed() * B,
    242   // so we expect to get these results:
    243   // 1*-1 + 2*-5 + 3*-9 = -38
    244   // 4*-1 + 5*-5 + 6*-9 = -83
    245   // 1*-2 + 2*-6 + 3*-10 = -44
    246   // 4*-2 + 5*-6 + 6*-10 = -98
    247   // 1*-3 + 2*-7 + 3*-11 = -50
    248   // 4*-3 + 5*-7 + 6*-11 = -113
    249   // 1*-4 + 2*-8 + 3*-12 = -56
    250   // 4*-4 + 5*-8 + 6*-12 = -128
    251   // |  -38 |  -83 |
    252   // |  -44 |  -98 |
    253   // |  -50 | -113 |
    254   // |  -56 | -128 |
    255   Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols}));
    256   test::FillValues<qint32>(&expected, {
    257                                           -38,
    258                                           -83,
    259                                           -44,
    260                                           -98,
    261                                           -50,
    262                                           -113,
    263                                           -56,
    264                                           -128,
    265                                       });
    266   test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
    267 }
    268 
    269 // This test multiplies a couple of medium-sized 8-bit matrices, and tests the
    270 // results against what we saw from running a float MatMul with equivalent
    271 // inputs.
    272 TEST_F(QuantizedMatMulTest, Medium_WithParams) {
    273   const bool transpose_a = true;
    274   const bool transpose_b = false;
    275   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
    276                    .Input(FakeInput(DT_QUINT8))
    277                    .Input(FakeInput(DT_QUINT8))
    278                    .Input(FakeInput(DT_FLOAT))
    279                    .Input(FakeInput(DT_FLOAT))
    280                    .Input(FakeInput(DT_FLOAT))
    281                    .Input(FakeInput(DT_FLOAT))
    282                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
    283                    .Attr("transpose_a", transpose_a)
    284                    .Attr("transpose_b", transpose_b)
    285                    .Finalize(node_def()));
    286   TF_ASSERT_OK(InitOp());
    287 
    288   const int a_rows = 8;
    289   const int a_cols = 8;
    290   const float a_min = -2164.25f;
    291   const float a_max = 2006.27f;
    292   Tensor a_float(DT_FLOAT, {a_rows, a_cols});
    293   test::FillValues<float>(
    294       &a_float,
    295       {-1014.12, -157.382, -810.17,  1435.28,  1016.37,  219.684,  -316.054,
    296        -2164.25, 2006.27,  -547.444, 857.376,  404.376,  9.72115,  332.588,
    297        194.385,  -286.57,  26.062,   23.1125,  110.436,  247.055,  -127.683,
    298        -376.275, -124.81,  -846.826, -77.1507, 305.581,  -202.747, 12.9528,
    299        9.64886,  872.686,  40.9069,  197.816,  44.16,    -306.768, -1457.52,
    300        -368.939, -1049.42, -486.353, 1745.87,  95.7695,  395.773,  -254.333,
    301        -404.27,  787.16,   -2.44114, 199.37,   -1024.08, 784.901,  235.055,
    302        -42.7295, 241.498,  -245.365, 470.763,  186.159,  186.579,  -220.163,
    303        1304.58,  386.272,  -358.853, -755.996, 360.109,  -866.007, 55.2828,
    304        -508.801});
    305   Tensor a_quantized = FloatTensorToQuantized<quint8>(a_float, a_min, a_max);
    306 
    307   const int b_rows = 8;
    308   const int b_cols = 8;
    309   const float b_min = -0.739539f;
    310   const float b_max = 0.641057f;
    311   Tensor b_float(DT_FLOAT, {b_rows, b_cols});
    312   test::FillValues<float>(
    313       &b_float,
    314       {-0.294619, -0.0670519, 0.261507,   -0.126274, 0.127229,   -0.176945,
    315        -0.251223, 0.231086,   0.453694,   0.415666,  -0.288733,  0.508717,
    316        0.211551,  0.0435907,  -0.582383,  -0.308779, 0.0696883,  -0.438122,
    317        0.114,     0.433964,   0.109883,   0.284931,  -0.149661,  0.108657,
    318        0.458333,  -0.130231,  -0.35805,   -0.123206, -0.437968,  0.0282411,
    319        0.628818,  -0.0522173, -0.0233403, 0.124863,  0.217165,   0.262294,
    320        -0.171005, -0.254693,  -0.200433,  -0.287354, 0.488166,   -0.0354688,
    321        -0.118091, -0.590444,  0.491537,   -0.739539, 0.083117,   0.282482,
    322        0.275269,  -0.36574,   0.107476,   0.0511428, -0.136887,  -0.0149852,
    323        -0.259694, 0.641057,   0.264054,   -0.295126, -0.0218791, 0.361211,
    324        0.012448,  0.0709718,  -0.392394,  -0.434215});
    325   Tensor b_quantized = FloatTensorToQuantized<quint8>(b_float, b_min, b_max);
    326 
    327   AddInputFromArray<quint8>(a_quantized.shape(), a_quantized.flat<quint8>());
    328   AddInputFromArray<quint8>(b_quantized.shape(), b_quantized.flat<quint8>());
    329   AddInputFromArray<float>(TensorShape({1}), {a_min});
    330   AddInputFromArray<float>(TensorShape({1}), {a_max});
    331   AddInputFromArray<float>(TensorShape({1}), {b_min});
    332   AddInputFromArray<float>(TensorShape({1}), {b_max});
    333   TF_ASSERT_OK(RunOpKernel());
    334 
    335   Tensor expected_float(DT_FLOAT, {a_cols, b_cols});
    336   test::FillValues<float>(
    337       &expected_float,
    338       {1776.82f,  421.058f,  -854.308f, 1430.65f,  503.105f,  57.2744f,
    339        -1514.97f, -1163.66f, -87.0979f, -394.577f, -39.4983f, -79.1938f,
    340        -329.029f, 313.475f,  446.929f,  -59.5855f, 350.837f,  238.655f,
    341        -609.21f,  350.499f,  192.238f,  847.576f,  -103.177f, 185.886f,
    342        -90.5335f, 200.787f,  99.1981f,  -717.076f, 763.815f,  -703.726f,
    343        -125.164f, 732.325f,  -51.5303f, -418.826f, 60.0783f,  -299.658f,
    344        231.41f,   72.0622f,  -289.244f, 663.776f,  391.177f,  294.415f,
    345        -484.148f, -677.932f, -180.342f, -194.764f, 761.715f,  553.061f,
    346        -283.355f, 321.109f,  351.269f,  1171.7f,   -857.497f, 343.804f,
    347        -494.599f, -844.119f, 725.237f,  586.052f,  -735.013f, -897.723f,
    348        -122.434f, -502.907f, 1264.6f,   -239.991f});
    349 
    350   const Tensor& output_quantized = *GetOutput(0);
    351   const float output_min = GetOutput(1)->flat<float>()(0);
    352   const float output_max = GetOutput(2)->flat<float>()(0);
    353   Tensor output_float =
    354       QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
    355   test::ExpectTensorNear<float>(expected_float, output_float, 15.0);
    356 }
    357 
    358 }  // namespace tensorflow
    359