Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/framework/fake_input.h"
     17 #include "tensorflow/core/framework/node_def_builder.h"
     18 #include "tensorflow/core/framework/tensor.h"
     19 #include "tensorflow/core/framework/tensor_testutil.h"
     20 #include "tensorflow/core/kernels/ops_testutil.h"
     21 
     22 namespace tensorflow {
     23 
     24 using tensorflow::AllocatorAttributes;
     25 using tensorflow::DT_FLOAT;
     26 using tensorflow::NodeDefBuilder;
     27 using tensorflow::OpsTestBase;
     28 using tensorflow::Tensor;
     29 using tensorflow::TensorShape;
     30 using tensorflow::test::ExpectClose;
     31 using tensorflow::test::FillValues;
     32 
     33 class QuantOpsTest : public OpsTestBase {
     34  protected:
     35   void AddRandomInput(const TensorShape& shape) {
     36     CHECK_GT(input_types_.size(), inputs_.size())
     37         << "Adding more inputs than types; perhaps you need to call MakeOp";
     38     Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
     39                                DT_FLOAT, shape);
     40     input->flat<float>().setRandom();
     41     tensors_.push_back(input);
     42     bool is_ref = IsRefType(input_types_[inputs_.size()]);
     43     if (is_ref) {
     44       CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), DT_FLOAT);
     45       inputs_.push_back({&lock_for_refs_, input});
     46     } else {
     47       CHECK_EQ(input_types_[inputs_.size()], DT_FLOAT);
     48       inputs_.push_back({nullptr, input});
     49     }
     50   }
     51 
     52   void RunTestFakeQuantWithMinMaxArgs(const int num_bits,
     53                                       const bool narrow_range, const float min,
     54                                       const float max, const TensorShape& shape,
     55                                       const gtl::ArraySlice<float>& data,
     56                                       gtl::ArraySlice<float> expected_data) {
     57     TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
     58                      .Input(FakeInput(DT_FLOAT))  // inputs
     59                      .Attr("min", min)
     60                      .Attr("max", max)
     61                      .Attr("num_bits", num_bits)
     62                      .Attr("narrow_range", narrow_range)
     63                      .Finalize(node_def()));
     64     TF_EXPECT_OK(InitOp());
     65     // Downstream inputs.
     66     AddInputFromArray<float>(shape, data);
     67 
     68     // Tested code.
     69     TF_ASSERT_OK(RunOpKernel());
     70 
     71     Tensor* output = GetOutput(0);
     72     Tensor expected(allocator(), DT_FLOAT, shape);
     73     FillValues<float>(&expected, expected_data);
     74     ExpectClose(expected, *output);
     75   }
     76 
     77   void RunTestFakeQuantWithMinMaxVars(const int num_bits,
     78                                       const bool narrow_range, const float min,
     79                                       const float max, const TensorShape& shape,
     80                                       const gtl::ArraySlice<float>& data,
     81                                       gtl::ArraySlice<float> expected_data) {
     82     TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars")
     83                      .Input(FakeInput(DT_FLOAT))  // inputs
     84                      .Input(FakeInput(DT_FLOAT))  // min
     85                      .Input(FakeInput(DT_FLOAT))  // max
     86                      .Attr("num_bits", num_bits)
     87                      .Attr("narrow_range", narrow_range)
     88                      .Finalize(node_def()));
     89     TF_EXPECT_OK(InitOp());
     90     // Downstream inputs.
     91     AddInputFromArray<float>(shape, data);
     92     // Min.
     93     AddInputFromArray<float>(TensorShape({}), {min});
     94     // Max.
     95     AddInputFromArray<float>(TensorShape({}), {max});
     96 
     97     // Tested code.
     98     TF_ASSERT_OK(RunOpKernel());
     99 
    100     Tensor* output = GetOutput(0);
    101     Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
    102     FillValues<float>(&expected, expected_data);
    103     ExpectClose(expected, *output);
    104   }
    105 
    106   void RunTestFakeQuantWithMinMaxVarsPerChannel(
    107       const int num_bits, const bool narrow_range,
    108       const TensorShape& minmax_shape, const gtl::ArraySlice<float>& min,
    109       const gtl::ArraySlice<float>& max, const TensorShape& shape,
    110       const gtl::ArraySlice<float>& data,
    111       gtl::ArraySlice<float> expected_data) {
    112     TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
    113                      .Input(FakeInput(DT_FLOAT))  // inputs
    114                      .Input(FakeInput(DT_FLOAT))  // min
    115                      .Input(FakeInput(DT_FLOAT))  // max
    116                      .Attr("num_bits", num_bits)
    117                      .Attr("narrow_range", narrow_range)
    118                      .Finalize(node_def()));
    119     TF_EXPECT_OK(InitOp());
    120     // Downstream inputs.
    121     AddInputFromArray<float>(shape, data);
    122     // Min.
    123     AddInputFromArray<float>(minmax_shape, min);
    124     // Max.
    125     AddInputFromArray<float>(minmax_shape, max);
    126 
    127     // Tested code.
    128     TF_ASSERT_OK(RunOpKernel());
    129 
    130     Tensor* output = GetOutput(0);
    131     Tensor expected(allocator(), DT_FLOAT, shape);
    132     FillValues<float>(&expected, expected_data);
    133     ExpectClose(expected, *output);
    134   }
    135 };
    136 
    137 TEST_F(QuantOpsTest, WithArgsNoNudging_RegularRange) {
    138   // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
    139   // Original zero point: 40, no nudging necessary.
    140   // Expected quantized values: -10.0, -9.75, ..., 53.75.
    141   RunTestFakeQuantWithMinMaxArgs(
    142       8, false, -10.0f, 53.75f, TensorShape({2, 3}),
    143       {-10.1f, -10.0f, -9.9f, -9.75f, 53.75f, 53.8f},
    144       {-10.0f, -10.0f, -10.0f, -9.75f, 53.75f, 53.75f});
    145 }
    146 
    147 TEST_F(QuantOpsTest, WithArgsNoNudging_NarrowRange) {
    148   // Original quantization range: [-10 + 0 / 4, -10 + 254 / 4], scale: 1/4.
    149   // Original zero point: 41, no nudging necessary.
    150   // Expected quantized values: -10.0, -9.75, ..., 53.5.
    151   RunTestFakeQuantWithMinMaxArgs(
    152       8, true, -10.0f, 53.5f, TensorShape({2, 3}),
    153       {-10.1f, -10.0f, -9.9f, -9.75f, 53.5f, 53.6f},
    154       {-10.0f, -10.0f, -10.0f, -9.75f, 53.5f, 53.5f});
    155 }
    156 
    157 TEST_F(QuantOpsTest, WithArgsNudgedDown_RegularRange) {
    158   // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
    159   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
    160   // Nudged range: [0.0; 63.75].
    161   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
    162   RunTestFakeQuantWithMinMaxArgs(8, false, -0.1f, 63.65f, TensorShape({2, 3}),
    163                                  {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f},
    164                                  {0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f});
    165 }
    166 
    167 TEST_F(QuantOpsTest, WithArgsNudgedDown_NarrowRange) {
    168   // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
    169   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
    170   // Nudged range: [0.0; 63.5].
    171   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
    172   RunTestFakeQuantWithMinMaxArgs(8, true, -0.1f, 63.4f, TensorShape({2, 3}),
    173                                  {-0.1f, 0.0f, 0.1f, 0.25f, 63.5f, 63.6f},
    174                                  {0.0f, 0.0f, 0.0f, 0.25f, 63.5f, 63.5f});
    175 }
    176 
    177 TEST_F(QuantOpsTest, WithArgsNudgedUp_RegularRange) {
    178   // Original quantization range: [-0.51 / 4 + 0 / 4, -0.51 / 4 + 255 / 4].
    179   // Scale: 1/4,  original zero point: 0.51, nudged to 1.
    180   // Nudged range: [-0.25; 63.5].
    181   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    182   RunTestFakeQuantWithMinMaxArgs(8, false, -0.1275f, 63.6225f,
    183                                  TensorShape({2, 3}),
    184                                  {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f},
    185                                  {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f});
    186 }
    187 
    188 TEST_F(QuantOpsTest, WithArgsNudgedUp_NarrowRange) {
    189   // Original quantization range: [-0.51 / 4 + 0 / 4, -0.51 / 4 + 254 / 4].
    190   // Scale: 1/4,  original zero point: 1.51, nudged to 2.
    191   // Nudged range: [-0.25; 63.25].
    192   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
    193   RunTestFakeQuantWithMinMaxArgs(
    194       8, true, -0.1275f, 63.3725f, TensorShape({2, 3}),
    195       {-0.26f, -0.25f, -0.24f, 0.0f, 63.25f, 63.3f},
    196       {-0.25f, -0.25f, -0.25f, 0.0f, 63.25f, 63.25f});
    197 }
    198 
    199 TEST_F(QuantOpsTest, WithArgsNudgedZeroIs255_RegularRange) {
    200   // Original quantization range: [0.4 / 4 - 255 / 4, 0.4 / 4 + 0 / 4].
    201   // Scale: 1/4,  original zero point: 254.6, nudged to 255.
    202   // Nudged range: [-63.75; 0.0].
    203   // Expected quantized values: -63.75, -63.5, -63.25, ..., 0.0.
    204   RunTestFakeQuantWithMinMaxArgs(
    205       8, false, -63.65f, 0.1f, TensorShape({2, 3}),
    206       {-63.8f, -63.75f, -63.7f, -63.5f, 0.0f, 0.1f},
    207       {-63.75f, -63.75f, -63.75f, -63.5f, 0.0f, 0.0f});
    208 }
    209 
    210 TEST_F(QuantOpsTest, WithArgsNudgedZeroIs255_NarrowRange) {
    211   // Original quantization range: [0.4 / 4 - 254 / 4, 0.4 / 4 + 0 / 4].
    212   // Scale: 1/4,  original zero point: 254.6, nudged to 255.
    213   // Nudged range: [-63.5; 0.0].
    214   // Expected quantized values: -63.5, -63.25, -63.0, ..., 0.0.
    215   RunTestFakeQuantWithMinMaxArgs(8, true, -63.4f, 0.1f, TensorShape({2, 3}),
    216                                  {-63.6f, -63.5f, -63.4f, -63.25f, 0.0f, 0.1f},
    217                                  {-63.5f, -63.5f, -63.5f, -63.25f, 0.0f, 0.0f});
    218 }
    219 
    220 TEST_F(QuantOpsTest, WithArgsNoNudging_4Bits_RegularRange) {
    221   // Original quantization range: [-6 + 0 / 2, -6 + 15 / 2], scale: 1/2.
    222   // Original zero point: 12, no nudging necessary.
    223   // Expected quantized values: -6, -5.5, ..., 1.5.
    224   RunTestFakeQuantWithMinMaxArgs(4, false, -6.0f, 1.5f, TensorShape({2, 3}),
    225                                  {-6.1f, -6.0f, -5.9f, -5.5f, 1.5f, 1.6f},
    226                                  {-6.0f, -6.0f, -6.0f, -5.5f, 1.5f, 1.5f});
    227 }
    228 
    229 TEST_F(QuantOpsTest, WithArgsNoNudging_4Bits_NarrowRange) {
    230   // Original quantization range: [-6 + 0 / 2, -6 + 14 / 2], scale: 1/2.
    231   // Original zero point: 13, no nudging necessary.
    232   // Expected quantized values: -6, -5.5, ..., 1.0.
    233   RunTestFakeQuantWithMinMaxArgs(4, true, -6.0f, 1.0f, TensorShape({2, 3}),
    234                                  {-6.1f, -6.0f, -5.9f, -5.5f, 1.0f, 1.1f},
    235                                  {-6.0f, -6.0f, -6.0f, -5.5f, 1.0f, 1.0f});
    236 }
    237 
    238 TEST_F(QuantOpsTest, WithArgsNudgedDown_4Bits_RegularRange) {
    239   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
    240   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
    241   // Nudged range: [0.0; 7.5].
    242   // Expected quantized values: 0.0, 0.5, ..., 7.5.
    243   RunTestFakeQuantWithMinMaxArgs(4, false, -0.1f, 7.4f, TensorShape({2, 3}),
    244                                  {-0.1f, 0.0f, 0.1f, 0.5f, 7.5f, 7.6f},
    245                                  {0.0f, 0.0f, 0.0f, 0.5f, 7.5f, 7.5f});
    246 }
    247 
    248 TEST_F(QuantOpsTest, WithArgsNudgedDown_4Bits_NarrowRange) {
    249   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
    250   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
    251   // Nudged range: [0.0; 7.0].
    252   // Expected quantized values: 0.0, 0.5, ..., 7.0.
    253   RunTestFakeQuantWithMinMaxArgs(4, true, -0.1f, 6.9f, TensorShape({2, 3}),
    254                                  {-0.1f, 0.0f, 0.1f, 0.5f, 7.0f, 7.1f},
    255                                  {0.0f, 0.0f, 0.0f, 0.5f, 7.0f, 7.0f});
    256 }
    257 
    258 TEST_F(QuantOpsTest, WithArgsNudgedUp_4Bits_RegularRange) {
    259   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
    260   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
    261   // Nudged range: [-0.5; 7.0].
    262   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
    263   RunTestFakeQuantWithMinMaxArgs(4, false, -0.4f, 7.1f, TensorShape({2, 3}),
    264                                  {-0.6f, -0.5f, -0.24f, 0.0f, 7.0f, 7.1f},
    265                                  {-0.5f, -0.5f, -0.00f, 0.0f, 7.0f, 7.0f});
    266 }
    267 
    268 TEST_F(QuantOpsTest, WithArgsNudgedUp_4Bits_NarrowRange) {
    269   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
    270   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
    271   // Nudged range: [-0.5; 6.5].
    272   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
    273   RunTestFakeQuantWithMinMaxArgs(4, true, -0.4f, 6.6f, TensorShape({2, 3}),
    274                                  {-0.6f, -0.5f, -0.24f, 0.0f, 6.5f, 6.6f},
    275                                  {-0.5f, -0.5f, 0.0f, 0.0f, 6.5f, 6.5f});
    276 }
    277 
    278 TEST_F(QuantOpsTest, WithArgsNudgedZeroIs15_4Bits_RegularRange) {
    279   // Original quantization range: [0.4 / 2 - 15 / 2, 0.4 / 2 + 0 / 2].
    280   // Scale: 1/2,  original zero point: 14.6, nudged to 15.
    281   // Nudged range: [-7.5; 0.0].
    282   // Expected quantized values: -7.5, -7.0, ..., 0.0.
    283   RunTestFakeQuantWithMinMaxArgs(4, false, -7.3f, 0.2f, TensorShape({2, 3}),
    284                                  {-7.6f, -7.5f, -7.4f, -7.2f, 0.0f, 0.1f},
    285                                  {-7.5f, -7.5f, -7.5f, -7.0f, 0.0f, 0.0f});
    286 }
    287 
    288 TEST_F(QuantOpsTest, WithArgsNudgedZeroIs15_4Bits_NarrowRange) {
    289   // Original quantization range: [0.4 / 2 - 14 / 2, 0.4 / 2 + 0 / 2].
    290   // Scale: 1/2,  original zero point: 14.6, nudged to 15.
    291   // Nudged range: [-7.0; 0.0].
    292   // Expected quantized values: -7.0, -6.5, ..., 0.0.
    293   RunTestFakeQuantWithMinMaxArgs(4, true, -6.8f, 0.2f, TensorShape({2, 3}),
    294                                  {-7.1f, -7.0f, -6.9f, -6.7f, 0.0f, 0.1f},
    295                                  {-7.0f, -7.0f, -7.0f, -6.5f, 0.0f, 0.0f});
    296 }
    297 
    298 TEST_F(QuantOpsTest, WithArgsNoNudging_2Bits_RegularRange) {
    299   // Original quantization range: [-1 + 0 / 2, -1 + 3 / 2], scale: 1/2.
    300   // Original zero point: 2, no nudging necessary.
    301   // Expected quantized values: -1.0, -0.5, 0.0, 0.5.
    302   RunTestFakeQuantWithMinMaxArgs(2, false, -1.0f, 0.5f, TensorShape({2, 3}),
    303                                  {-1.1f, -1.0f, -0.9f, -0.3f, 0.1f, 0.6f},
    304                                  {-1.0f, -1.0f, -1.0f, -0.5f, 0.0f, 0.5f});
    305 }
    306 
    307 TEST_F(QuantOpsTest, WithArgsNoNudging_2Bits_NarrowRange) {
    308   // Original quantization range: [-1 + 0 / 2, -1 + 2 / 2], scale: 1/2.
    309   // Original zero point: 3, no nudging necessary.
    310   // Expected quantized values: -1.0, -0.5, 0.0.
    311   RunTestFakeQuantWithMinMaxArgs(2, true, -1.0f, 0.0f, TensorShape({2, 3}),
    312                                  {-1.1f, -1.0f, -0.9f, -0.3f, 0.0f, 0.1f},
    313                                  {-1.0f, -1.0f, -1.0f, -0.5f, 0.0f, 0.0f});
    314 }
    315 
    316 TEST_F(QuantOpsTest, WithArgsNudgedDown_2Bits_RegularRange) {
    317   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 3 / 2].
    318   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
    319   // Nudged range: [0.0; 1.5].
    320   // Expected quantized values: 0.0, 0.5, 1.0, 1.5.
    321   RunTestFakeQuantWithMinMaxArgs(2, false, -0.1f, 1.4f, TensorShape({2, 3}),
    322                                  {-0.2f, 0.1f, 0.7f, 1.0f, 1.3f, 1.6f},
    323                                  {0.0f, 0.0f, 0.5f, 1.0f, 1.5f, 1.5f});
    324 }
    325 
    326 TEST_F(QuantOpsTest, WithArgsNudgedDown_2Bits_NarrowRange) {
    327   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 2 / 2].
    328   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
    329   // Nudged range: [0.0; 1.0].
    330   // Expected quantized values: 0.0, 0.5, 1.0.
    331   RunTestFakeQuantWithMinMaxArgs(2, true, -0.1f, 0.9f, TensorShape({2, 3}),
    332                                  {-0.1f, 0.1f, 0.7f, 0.9f, 1.0f, 1.1f},
    333                                  {-0.0f, 0.0f, 0.5f, 1.0f, 1.0f, 1.0f});
    334 }
    335 
    336 TEST_F(QuantOpsTest, WithArgsNudgedUp_2Bits_RegularRange) {
    337   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 3 / 2].
    338   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
    339   // Nudged range: [-0.5; 1.0].
    340   // Expected quantized values: -0.5, 0.0, 0.5, 1.0.
    341   RunTestFakeQuantWithMinMaxArgs(2, false, -0.4f, 1.1f, TensorShape({2, 3}),
    342                                  {-0.6f, -0.5f, -0.24f, 0.0f, 1.0f, 1.1f},
    343                                  {-0.5f, -0.5f, 0.0f, 0.0f, 1.0f, 1.0f});
    344 }
    345 
    346 TEST_F(QuantOpsTest, WithArgsNudgedUp_2Bits_NarrowRange) {
    347   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 2 / 2].
    348   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
    349   // Nudged range: [-0.5; 0.5].
    350   // Expected quantized values: -0.5, 0.0, 0.5.
    351   RunTestFakeQuantWithMinMaxArgs(2, true, -0.4f, 0.6f, TensorShape({2, 3}),
    352                                  {-0.6f, -0.5f, -0.24f, 0.0f, 0.5f, 0.6f},
    353                                  {-0.5f, -0.5f, -0.00f, 0.0f, 0.5f, 0.5f});
    354 }
    355 
    356 TEST_F(QuantOpsTest, WithArgsGradient_RegularRange) {
    357   // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
    358   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
    359   // Nudged range: [-0.25; 63.5].
    360   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    361   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgsGradient")
    362                    .Input(FakeInput(DT_FLOAT))  // gradient
    363                    .Input(FakeInput(DT_FLOAT))  // inputs
    364                    .Attr("min", -0.125f)
    365                    .Attr("max", 63.625f)
    366                    .Attr("narrow_range", false)
    367                    .Finalize(node_def()));
    368   TF_EXPECT_OK(InitOp());
    369   // Upstream gradients.
    370   AddRandomInput(TensorShape({2, 3}));
    371   // Downstream inputs.
    372   AddInputFromArray<float>(TensorShape({2, 3}),
    373                            {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f});
    374 
    375   // Tested code.
    376   TF_ASSERT_OK(RunOpKernel());
    377 
    378   Tensor* output = GetOutput(0);
    379   auto input_flat = GetInput(0).flat<float>();
    380   Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
    381   FillValues<float>(&expected, {0.0f, input_flat(1), input_flat(2),
    382                                 input_flat(3), input_flat(4), 0.0f});
    383   ExpectClose(expected, *output);
    384 }
    385 
    386 TEST_F(QuantOpsTest, WithArgsGradient_NarrowRange) {
    387   // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
    388   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
    389   // Nudged range: [-0.25; 63.25].
    390   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    391   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgsGradient")
    392                    .Input(FakeInput(DT_FLOAT))  // gradient
    393                    .Input(FakeInput(DT_FLOAT))  // inputs
    394                    .Attr("min", -0.125f)
    395                    .Attr("max", 63.375f)
    396                    .Attr("narrow_range", true)
    397                    .Finalize(node_def()));
    398   TF_EXPECT_OK(InitOp());
    399   // Upstream gradients.
    400   AddRandomInput(TensorShape({2, 3}));
    401   // Downstream inputs.
    402   AddInputFromArray<float>(TensorShape({2, 3}),
    403                            {-0.26f, -0.25f, -0.24f, 0.0f, 63.25f, 63.3f});
    404 
    405   // Tested code.
    406   TF_ASSERT_OK(RunOpKernel());
    407 
    408   Tensor* output = GetOutput(0);
    409   auto input_flat = GetInput(0).flat<float>();
    410   Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
    411   FillValues<float>(&expected, {0.0f, input_flat(1), input_flat(2),
    412                                 input_flat(3), input_flat(4), 0.0f});
    413   ExpectClose(expected, *output);
    414 }
    415 
    416 TEST_F(QuantOpsTest, WithArgsGradient_4Bits_RegularRange) {
    417   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
    418   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
    419   // Nudged range: [-0.5; 7.0].
    420   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
    421   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgsGradient")
    422                    .Input(FakeInput(DT_FLOAT))  // gradient
    423                    .Input(FakeInput(DT_FLOAT))  // inputs
    424                    .Attr("min", -0.4f)
    425                    .Attr("max", 7.1f)
    426                    .Attr("num_bits", 4)
    427                    .Attr("narrow_range", false)
    428                    .Finalize(node_def()));
    429   TF_EXPECT_OK(InitOp());
    430   // Upstream gradients.
    431   AddRandomInput(TensorShape({2, 3}));
    432   // Downstream inputs.
    433   AddInputFromArray<float>(TensorShape({2, 3}),
    434                            {-0.6f, -0.5f, -0.4f, 0.0f, 7.0f, 7.1f});
    435 
    436   // Tested code.
    437   TF_ASSERT_OK(RunOpKernel());
    438 
    439   Tensor* output = GetOutput(0);
    440   auto input_flat = GetInput(0).flat<float>();
    441   Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
    442   FillValues<float>(&expected, {0.0f, input_flat(1), input_flat(2),
    443                                 input_flat(3), input_flat(4), 0.0f});
    444   ExpectClose(expected, *output);
    445 }
    446 
    447 TEST_F(QuantOpsTest, WithArgsGradient_4Bits_NarrowRange) {
    448   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
    449   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
    450   // Nudged range: [-0.5; 6.5].
    451   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
    452   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgsGradient")
    453                    .Input(FakeInput(DT_FLOAT))  // gradient
    454                    .Input(FakeInput(DT_FLOAT))  // inputs
    455                    .Attr("min", -0.4f)
    456                    .Attr("max", 6.6f)
    457                    .Attr("num_bits", 4)
    458                    .Attr("narrow_range", true)
    459                    .Finalize(node_def()));
    460   TF_EXPECT_OK(InitOp());
    461   // Upstream gradients.
    462   AddRandomInput(TensorShape({2, 3}));
    463   // Downstream inputs.
    464   AddInputFromArray<float>(TensorShape({2, 3}),
    465                            {-0.6f, -0.5f, -0.4f, 0.0f, 6.5f, 6.6f});
    466 
    467   // Tested code.
    468   TF_ASSERT_OK(RunOpKernel());
    469 
    470   Tensor* output = GetOutput(0);
    471   auto input_flat = GetInput(0).flat<float>();
    472   Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
    473   FillValues<float>(&expected, {0.0f, input_flat(1), input_flat(2),
    474                                 input_flat(3), input_flat(4), 0.0f});
    475   ExpectClose(expected, *output);
    476 }
    477 
    478 TEST_F(QuantOpsTest, WithVars_ZeroMinAndMax) {
    479   RunTestFakeQuantWithMinMaxVars(8, false, 0.0f, 0.0f, TensorShape({2, 3}),
    480                                  {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
    481                                  {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
    482 }
    483 
    484 TEST_F(QuantOpsTest, WithVarsNoNudging_RegularRange) {
    485   // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
    486   // Original zero point: 40, no nudging necessary.
    487   // Expected quantized values: -10.0, -10.25, ..., 53.75.
    488   RunTestFakeQuantWithMinMaxVars(
    489       8, false, -10.0f, 53.75f, TensorShape({2, 3}),
    490       {-10.1f, -10.0f, -9.9f, -9.75f, 53.75f, 53.8f},
    491       {-10.0f, -10.0f, -10.0f, -9.75f, 53.75f, 53.75f});
    492 }
    493 
    494 TEST_F(QuantOpsTest, WithVarsNoNudging_NarrowRange) {
    495   // Original quantization range: [-10 + 0 / 4, -10 + 254 / 4], scale: 1/4.
    496   // Original zero point: 41, no nudging necessary.
    497   // Expected quantized values: -10.0, -10.25, ..., 53.5.
    498   RunTestFakeQuantWithMinMaxVars(
    499       8, true, -10.0f, 53.5f, TensorShape({2, 3}),
    500       {-10.1f, -10.0f, -9.90f, -9.75f, 53.5f, 53.6f},
    501       {-10.0f, -10.0f, -10.0f, -9.75f, 53.5f, 53.5f});
    502 }
    503 
    504 TEST_F(QuantOpsTest, WithVarsNudgedDown_RegularRange) {
    505   // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
    506   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
    507   // Nudged range: [0.0; 63.75].
    508   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
    509   RunTestFakeQuantWithMinMaxVars(8, false, -0.1f, 63.65f, TensorShape({2, 3}),
    510                                  {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f},
    511                                  {-0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f});
    512 }
    513 
    514 TEST_F(QuantOpsTest, WithVarsNudgedDown_NarrowRange) {
    515   // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
    516   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
    517   // Nudged range: [0.0; 63.5].
    518   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
    519   RunTestFakeQuantWithMinMaxVars(8, true, -0.1f, 63.4f, TensorShape({2, 3}),
    520                                  {-0.1f, 0.0f, 0.1f, 0.25f, 63.5f, 63.6f},
    521                                  {-0.0f, 0.0f, 0.0f, 0.25f, 63.5f, 63.5f});
    522 }
    523 
    524 TEST_F(QuantOpsTest, WithVarsNudgedUp_RegularRange) {
    525   // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
    526   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
    527   // Nudged range: [-0.25; 63.5].
    528   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    529   RunTestFakeQuantWithMinMaxVars(8, false, -0.125f, 63.625f,
    530                                  TensorShape({2, 3}),
    531                                  {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f},
    532                                  {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f});
    533 }
    534 
    535 TEST_F(QuantOpsTest, WithVarsNudgedUp_NarrowRange) {
    536   // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
    537   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
    538   // Nudged range: [-0.25; 63.25].
    539   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
    540   RunTestFakeQuantWithMinMaxVars(
    541       8, true, -0.125f, 63.375f, TensorShape({2, 3}),
    542       {-0.26f, -0.25f, -0.24f, 0.0f, 63.25f, 63.3f},
    543       {-0.25f, -0.25f, -0.25f, 0.0f, 63.25f, 63.25f});
    544 }
    545 
    546 TEST_F(QuantOpsTest, WithVarsNudgedZeroIs255_RegularRange) {
    547   // Original quantization range: [0.4 / 4 - 255 / 4, 0.4 / 4 + 0 / 4].
    548   // Scale: 1/4,  original zero point: 254.6, nudged to 255.
    549   // Nudged range: [-63.75; 0.0].
    550   // Expected quantized values: -63.75, -63.5, -63.25, ..., 0.0.
    551   RunTestFakeQuantWithMinMaxVars(
    552       8, false, -63.65f, 0.1f, TensorShape({2, 3}),
    553       {-63.80f, -63.75f, -63.70f, -63.5f, 0.0f, 0.1f},
    554       {-63.75f, -63.75f, -63.75f, -63.5f, 0.0f, 0.0f});
    555 }
    556 
    557 TEST_F(QuantOpsTest, WithVarsNudgedZeroIs255_NarrowRange) {
    558   // Original quantization range: [0.4 / 4 - 254 / 4, 0.4 / 4 + 0 / 4].
    559   // Scale: 1/4,  original zero point: 254.6, nudged to 255.
    560   // Nudged range: [-63.5; 0.0].
    561   // Expected quantized values: -63.5, -63.25, -63.0, ..., 0.0.
    562   RunTestFakeQuantWithMinMaxVars(8, true, -63.4f, 0.1f, TensorShape({2, 3}),
    563                                  {-63.6f, -63.5f, -63.4f, -63.25f, 0.0f, 0.1f},
    564                                  {-63.5f, -63.5f, -63.5f, -63.25f, 0.0f, 0.0f});
    565 }
    566 
    567 TEST_F(QuantOpsTest, WithVarsNoNudging_4Bits_RegularRange) {
    568   // Original quantization range: [-6 + 0 / 2, -6 + 15 / 2], scale: 1/2.
    569   // Original zero point: 12, no nudging necessary.
    570   // Expected quantized values: -6, -5.5, ..., 1.5.
    571   RunTestFakeQuantWithMinMaxVars(4, false, -6.0f, 1.5f, TensorShape({2, 3}),
    572                                  {-6.1f, -6.0f, -5.9f, -5.5f, 1.5f, 1.6f},
    573                                  {-6.0f, -6.0f, -6.0f, -5.5f, 1.5f, 1.5f});
    574 }
    575 
    576 TEST_F(QuantOpsTest, WithVarsNoNudging_4Bits_NarrowRange) {
    577   // Original quantization range: [-6 + 0 / 2, -6 + 14 / 2], scale: 1/2.
    578   // Original zero point: 13, no nudging necessary.
    579   // Expected quantized values: -6, -5.5, ..., 1.0.
    580   RunTestFakeQuantWithMinMaxVars(4, true, -6.0f, 1.0f, TensorShape({2, 3}),
    581                                  {-6.1f, -6.0f, -5.9f, -5.5f, 1.0f, 1.1f},
    582                                  {-6.0f, -6.0f, -6.0f, -5.5f, 1.0f, 1.0f});
    583 }
    584 
    585 TEST_F(QuantOpsTest, WithVarsNudgedDown_4Bits_RegularRange) {
    586   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
    587   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
    588   // Nudged range: [0.0; 7.5].
    589   // Expected quantized values: 0.0, 0.5, ..., 7.5.
    590   RunTestFakeQuantWithMinMaxVars(4, false, -0.1f, 7.4f, TensorShape({2, 3}),
    591                                  {-0.1f, 0.0f, 0.1f, 0.5f, 7.5f, 7.6f},
    592                                  {-0.0f, 0.0f, 0.0f, 0.5f, 7.5f, 7.5f});
    593 }
    594 
    595 TEST_F(QuantOpsTest, WithVarsNudgedDown_4Bits_NarrowRange) {
    596   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
    597   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
    598   // Nudged range: [0.0; 7.0].
    599   // Expected quantized values: 0.0, 0.5, ..., 7.0.
    600   RunTestFakeQuantWithMinMaxVars(4, true, -0.1f, 6.9f, TensorShape({2, 3}),
    601                                  {-0.1f, 0.0f, 0.1f, 0.5f, 7.0f, 7.1f},
    602                                  {-0.0f, 0.0f, 0.0f, 0.5f, 7.0f, 7.0f});
    603 }
    604 
    605 TEST_F(QuantOpsTest, WithVarsNudgedUp_4Bits_RegularRange) {
    606   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
    607   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
    608   // Nudged range: [-0.5; 7.0].
    609   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
    610   RunTestFakeQuantWithMinMaxVars(4, false, -0.4f, 7.1f, TensorShape({2, 3}),
    611                                  {-0.6f, -0.5f, -0.24f, 0.0f, 7.0f, 7.1f},
    612                                  {-0.5f, -0.5f, -0.00f, 0.0f, 7.0f, 7.0f});
    613 }
    614 
    615 TEST_F(QuantOpsTest, WithVarsNudgedUp_4Bits_NarrowRange) {
    616   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
    617   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
    618   // Nudged range: [-0.5; 6.5].
    619   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
    620   RunTestFakeQuantWithMinMaxVars(4, true, -0.4f, 6.6f, TensorShape({2, 3}),
    621                                  {-0.6f, -0.5f, -0.24f, 0.0f, 6.5f, 6.6f},
    622                                  {-0.5f, -0.5f, -0.00f, 0.0f, 6.5f, 6.5f});
    623 }
    624 
    625 TEST_F(QuantOpsTest, WithVarsNudgedZero15_4Bits_RegularRange) {
    626   // Original quantization range: [0.4 / 2 - 15 / 2, 0.4 / 2 + 0 / 2].
    627   // Scale: 1/2,  original zero point: 14.6, nudged to 15.
    628   // Nudged range: [-7.5; 0.0].
    629   // Expected quantized values: -7.5, -7.0, ..., 0.0.
    630   RunTestFakeQuantWithMinMaxVars(4, false, -7.3f, 0.2f, TensorShape({2, 3}),
    631                                  {-7.6f, -7.5f, -7.4f, -7.2f, 0.0f, 0.1f},
    632                                  {-7.5f, -7.5f, -7.5f, -7.0f, 0.0f, 0.0f});
    633 }
    634 
    635 TEST_F(QuantOpsTest, WithVarsNudgedZero15_4Bits_NarrowRange) {
    636   // Original quantization range: [0.4 / 2 - 14 / 2, 0.4 / 2 + 0 / 2].
    637   // Scale: 1/2,  original zero point: 14.6, nudged to 15.
    638   // Nudged range: [-7.0; 0.0].
    639   // Expected quantized values: -7.0, -6.5, ..., 0.0.
    640   RunTestFakeQuantWithMinMaxVars(4, true, -6.8f, 0.2f, TensorShape({2, 3}),
    641                                  {-7.1f, -7.0f, -6.9f, -6.5f, 0.0f, 0.1f},
    642                                  {-7.0f, -7.0f, -7.0f, -6.5f, 0.0f, 0.0f});
    643 }
    644 
    645 TEST_F(QuantOpsTest, WithVarsGradient_ZeroMinAndMax) {
    646   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient")
    647                    .Attr("narrow_range", false)
    648                    .Input(FakeInput(DT_FLOAT))  // gradients
    649                    .Input(FakeInput(DT_FLOAT))  // inputs
    650                    .Input(FakeInput(DT_FLOAT))  // min
    651                    .Input(FakeInput(DT_FLOAT))  // max
    652                    .Finalize(node_def()));
    653   TF_EXPECT_OK(InitOp());
    654   // Upstream gradients.
    655   AddRandomInput(TensorShape({2, 3}));
    656   // Downstream inputs.
    657   AddInputFromArray<float>(TensorShape({2, 3}),
    658                            {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
    659   // Min.
    660   AddInputFromArray<float>(TensorShape({}), {0.0f});
    661   // Max.
    662   AddInputFromArray<float>(TensorShape({}), {0.0f});
    663 
    664   // Tested code.
    665   TF_ASSERT_OK(RunOpKernel());
    666 
    667   Tensor* output_bprop_wrt_input = GetOutput(0);
    668   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
    669   auto in_flat = GetInput(0).flat<float>();
    670   FillValues<float>(
    671       &expected_bprop_wrt_input,
    672       {in_flat(0), in_flat(1), in_flat(2), in_flat(3), in_flat(4), in_flat(5)});
    673   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
    674 
    675   Tensor* output_bprop_wrt_min = GetOutput(1);
    676   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({}));
    677   expected_bprop_wrt_min.flat<float>()(0) = 0.0f;
    678   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
    679 
    680   Tensor* output_bprop_wrt_max = GetOutput(2);
    681   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({}));
    682   expected_bprop_wrt_max.flat<float>()(0) = 0.0f;
    683   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
    684 }
    685 
    686 TEST_F(QuantOpsTest, WithVarsGradient_RegularRange) {
    687   // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
    688   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
    689   // Nudged range: [-0.25; 63.5].
    690   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    691   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient")
    692                    .Attr("narrow_range", false)
    693                    .Input(FakeInput(DT_FLOAT))  // gradients
    694                    .Input(FakeInput(DT_FLOAT))  // inputs
    695                    .Input(FakeInput(DT_FLOAT))  // min
    696                    .Input(FakeInput(DT_FLOAT))  // max
    697                    .Finalize(node_def()));
    698   TF_EXPECT_OK(InitOp());
    699   // Upstream gradients.
    700   AddRandomInput(TensorShape({2, 3}));
    701   // Downstream inputs.
    702   AddInputFromArray<float>(TensorShape({2, 3}),
    703                            {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f});
    704   // Min.
    705   AddInputFromArray<float>(TensorShape({}), {-0.125f});
    706   // Max.
    707   AddInputFromArray<float>(TensorShape({}), {63.625f});
    708 
    709   // Tested code.
    710   TF_ASSERT_OK(RunOpKernel());
    711 
    712   Tensor* output_bprop_wrt_input = GetOutput(0);
    713   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
    714   auto in_flat = GetInput(0).flat<float>();
    715   FillValues<float>(&expected_bprop_wrt_input, {0.0f, in_flat(1), in_flat(2),
    716                                                 in_flat(3), in_flat(4), 0.0f});
    717   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
    718 
    719   Tensor* output_bprop_wrt_min = GetOutput(1);
    720   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({}));
    721   expected_bprop_wrt_min.flat<float>()(0) = in_flat(0);
    722   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
    723 
    724   Tensor* output_bprop_wrt_max = GetOutput(2);
    725   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({}));
    726   expected_bprop_wrt_max.flat<float>()(0) = in_flat(5);
    727   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
    728 }
    729 
    730 TEST_F(QuantOpsTest, WithVarsGradient_NarrowRange) {
    731   // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
    732   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
    733   // Nudged range: [-0.25; 63.25].
    734   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
    735   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient")
    736                    .Attr("narrow_range", true)
    737                    .Input(FakeInput(DT_FLOAT))  // gradients
    738                    .Input(FakeInput(DT_FLOAT))  // inputs
    739                    .Input(FakeInput(DT_FLOAT))  // min
    740                    .Input(FakeInput(DT_FLOAT))  // max
    741                    .Finalize(node_def()));
    742   TF_EXPECT_OK(InitOp());
    743   // Upstream gradients.
    744   AddRandomInput(TensorShape({2, 3}));
    745   // Downstream inputs.
    746   AddInputFromArray<float>(TensorShape({2, 3}),
    747                            {-0.26f, -0.25f, -0.24f, 0.0f, 63.25f, 63.3f});
    748   // Min.
    749   AddInputFromArray<float>(TensorShape({}), {-0.125f});
    750   // Max.
    751   AddInputFromArray<float>(TensorShape({}), {63.375f});
    752 
    753   // Tested code.
    754   TF_ASSERT_OK(RunOpKernel());
    755 
    756   Tensor* output_bprop_wrt_input = GetOutput(0);
    757   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
    758   auto in_flat = GetInput(0).flat<float>();
    759   FillValues<float>(&expected_bprop_wrt_input, {0.0f, in_flat(1), in_flat(2),
    760                                                 in_flat(3), in_flat(4), 0.0f});
    761   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
    762 
    763   Tensor* output_bprop_wrt_min = GetOutput(1);
    764   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({}));
    765   expected_bprop_wrt_min.flat<float>()(0) = in_flat(0);
    766   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
    767 
    768   Tensor* output_bprop_wrt_max = GetOutput(2);
    769   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({}));
    770   expected_bprop_wrt_max.flat<float>()(0) = in_flat(5);
    771   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
    772 }
    773 
    774 TEST_F(QuantOpsTest, WithVarsGradient_4Bits_RegularRange) {
    775   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
    776   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
    777   // Nudged range: [-0.5; 7.0].
    778   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
    779   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient")
    780                    .Attr("num_bits", 4)
    781                    .Attr("narrow_range", false)
    782                    .Input(FakeInput(DT_FLOAT))  // gradients
    783                    .Input(FakeInput(DT_FLOAT))  // inputs
    784                    .Input(FakeInput(DT_FLOAT))  // min
    785                    .Input(FakeInput(DT_FLOAT))  // max
    786                    .Finalize(node_def()));
    787   TF_EXPECT_OK(InitOp());
    788   // Upstream gradients.
    789   AddRandomInput(TensorShape({2, 3}));
    790   // Downstream inputs.
    791   AddInputFromArray<float>(TensorShape({2, 3}),
    792                            {-0.6f, -0.5f, -0.4f, 0.0f, 7.0f, 7.1f});
    793   // Min.
    794   AddInputFromArray<float>(TensorShape({}), {-0.4f});
    795   // Max.
    796   AddInputFromArray<float>(TensorShape({}), {7.1f});
    797 
    798   // Tested code.
    799   TF_ASSERT_OK(RunOpKernel());
    800 
    801   Tensor* output_bprop_wrt_input = GetOutput(0);
    802   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
    803   auto in_flat = GetInput(0).flat<float>();
    804   FillValues<float>(&expected_bprop_wrt_input, {0.0f, in_flat(1), in_flat(2),
    805                                                 in_flat(3), in_flat(4), 0.0f});
    806   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
    807 
    808   Tensor* output_bprop_wrt_min = GetOutput(1);
    809   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({}));
    810   expected_bprop_wrt_min.flat<float>()(0) = in_flat(0);
    811   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
    812 
    813   Tensor* output_bprop_wrt_max = GetOutput(2);
    814   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({}));
    815   expected_bprop_wrt_max.flat<float>()(0) = in_flat(5);
    816   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
    817 }
    818 
    819 TEST_F(QuantOpsTest, WithVarsGradient_4Bits_NarrowRange) {
    820   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
    821   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
    822   // Nudged range: [-0.5; 6.5].
    823   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
    824   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient")
    825                    .Attr("num_bits", 4)
    826                    .Attr("narrow_range", true)
    827                    .Input(FakeInput(DT_FLOAT))  // gradients
    828                    .Input(FakeInput(DT_FLOAT))  // inputs
    829                    .Input(FakeInput(DT_FLOAT))  // min
    830                    .Input(FakeInput(DT_FLOAT))  // max
    831                    .Finalize(node_def()));
    832   TF_EXPECT_OK(InitOp());
    833   // Upstream gradients.
    834   AddRandomInput(TensorShape({2, 3}));
    835   // Downstream inputs.
    836   AddInputFromArray<float>(TensorShape({2, 3}),
    837                            {-0.6f, -0.5f, -0.4f, 0.0f, 6.5f, 6.6f});
    838   // Min.
    839   AddInputFromArray<float>(TensorShape({}), {-0.4f});
    840   // Max.
    841   AddInputFromArray<float>(TensorShape({}), {6.6f});
    842 
    843   // Tested code.
    844   TF_ASSERT_OK(RunOpKernel());
    845 
    846   Tensor* output_bprop_wrt_input = GetOutput(0);
    847   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
    848   auto in_flat = GetInput(0).flat<float>();
    849   FillValues<float>(&expected_bprop_wrt_input, {0.0f, in_flat(1), in_flat(2),
    850                                                 in_flat(3), in_flat(4), 0.0f});
    851   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
    852 
    853   Tensor* output_bprop_wrt_min = GetOutput(1);
    854   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({}));
    855   expected_bprop_wrt_min.flat<float>()(0) = in_flat(0);
    856   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
    857 
    858   Tensor* output_bprop_wrt_max = GetOutput(2);
    859   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({}));
    860   expected_bprop_wrt_max.flat<float>()(0) = in_flat(5);
    861   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
    862 }
    863 
    864 TEST_F(QuantOpsTest, WithVarsPerChannel_ZeroMinAndMax) {
    865   RunTestFakeQuantWithMinMaxVarsPerChannel(
    866       8, false, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
    867       {0.0f, 0.0f, 0.0f, 0.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
    868       {0.0f, 0.0f, 0.0f, 0.0f});
    869 }
    870 
    871 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_RegularRange) {
    872   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
    873   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
    874   // Nudged ranges: [0.0; 63.75].
    875   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
    876   RunTestFakeQuantWithMinMaxVarsPerChannel(
    877       8, false, TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f},
    878       {63.65f, 63.65f, 63.65f, 63.65f}, TensorShape({4}),
    879       {-0.1f, 0.0f, 63.75f, 63.8f}, {0.0f, 0.0f, 63.75f, 63.75f});
    880 }
    881 
    882 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_NarrowRange) {
    883   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
    884   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
    885   // Nudged ranges: [0.0; 63.5].
    886   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
    887   RunTestFakeQuantWithMinMaxVarsPerChannel(
    888       8, true, TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f},
    889       {63.4f, 63.4f, 63.4f, 63.4f}, TensorShape({4}),
    890       {-0.1f, 0.0f, 63.5f, 63.6f}, {0.0f, 0.0f, 63.5f, 63.5f});
    891 }
    892 
    893 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedUp_RegularRange) {
    894   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
    895   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
    896   // Nudged ranges: [-0.25; 63.5].
    897   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    898   RunTestFakeQuantWithMinMaxVarsPerChannel(
    899       8, false, TensorShape({4}), {-0.125f, -0.125f, -0.125f, -0.125f},
    900       {63.625f, 63.625f, 63.625f, 63.625f}, TensorShape({4}),
    901       {-0.26f, -0.25f, -0.24f, 63.6f}, {-0.25f, -0.25f, -0.25f, 63.5f});
    902 }
    903 
    904 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedUp_NarrowRange) {
    905   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
    906   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
    907   // Nudged ranges: [-0.25; 63.25].
    908   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
    909   RunTestFakeQuantWithMinMaxVarsPerChannel(
    910       8, true, TensorShape({4}), {-0.125f, -0.125f, -0.125f, -0.125f},
    911       {63.375f, 63.375f, 63.375f, 63.375f}, TensorShape({4}),
    912       {-0.26f, -0.25f, -0.24f, 63.3f}, {-0.25f, -0.25f, -0.25f, 63.25f});
    913 }
    914 
    915 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedDown_RegularRange) {
    916   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
    917   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
    918   // Nudged ranges: [0.0; 63.75].
    919   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
    920   RunTestFakeQuantWithMinMaxVarsPerChannel(
    921       8, false, TensorShape({3}), {-0.1f, -0.1f, -0.1f},
    922       {63.65f, 63.65f, 63.65f}, TensorShape({2, 3}),
    923       {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.80f},
    924       {-0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f});
    925 }
    926 
    927 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedDown_NarrowRange) {
    928   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
    929   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
    930   // Nudged ranges: [0.0; 63.5].
    931   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
    932   RunTestFakeQuantWithMinMaxVarsPerChannel(
    933       8, true, TensorShape({3}), {-0.1f, -0.1f, -0.1f}, {63.4f, 63.4f, 63.4f},
    934       TensorShape({2, 3}), {-0.1f, 0.0f, 0.1f, 0.25f, 63.5f, 63.6f},
    935       {0.0f, 0.0f, 0.0f, 0.25f, 63.5f, 63.5f});
    936 }
    937 
    938 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedUp_RegularRange) {
    939   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
    940   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
    941   // Nudged ranges: [-0.25; 63.5].
    942   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
    943   RunTestFakeQuantWithMinMaxVarsPerChannel(
    944       8, false, TensorShape({3}), {-0.125f, -0.125f, -0.125f},
    945       {63.625f, 63.625f, 63.625f}, TensorShape({2, 3}),
    946       {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f},
    947       {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f});
    948 }
    949 
    950 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedUp_NarrowRange) {
    951   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
    952   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
    953   // Nudged ranges: [-0.25; 63.25].
    954   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
    955   RunTestFakeQuantWithMinMaxVarsPerChannel(
    956       8, true, TensorShape({3}), {-0.125f, -0.125f, -0.125f},
    957       {63.375f, 63.375f, 63.375f}, TensorShape({2, 3}),
    958       {-0.26f, -0.25f, -0.24f, 0.0f, 63.25f, 63.3f},
    959       {-0.25f, -0.25f, -0.25f, 0.0f, 63.25f, 63.25f});
    960 }
    961 
    962 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedDown_RegularRange) {
    963   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
    964   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
    965   // Nudged ranges: [0.0; 63.75].
    966   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
    967   // clang-format off
    968   RunTestFakeQuantWithMinMaxVarsPerChannel(
    969       8, false,
    970       TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f},
    971       {63.65f, 63.65f, 63.65f, 63.65f},
    972       TensorShape({1, 2, 3, 4}),
    973       {-0.1f,   0.0f,   0.1f,   0.25f,  0.5f,    0.75f,
    974         1.0f,   1.25f,  1.5f,   1.75f,  2.0f,    2.25f,
    975        63.0f,  63.25f, 63.5f,  63.7f,  63.75f,  63.8f,
    976        63.9f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
    977       { 0.0f,   0.0f,   0.0f,   0.25f,  0.5f,    0.75f,
    978         1.0f,   1.25f,  1.5f,   1.75f,  2.0f,    2.25f,
    979        63.0f,  63.25f, 63.5f,  63.75f, 63.75f,  63.75f,
    980        63.75f, 63.75f, 63.75f, 63.75f, 63.75f,  63.75f});
    981   // clang-format on
    982 }
    983 
    984 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedDown_NarrowRange) {
    985   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
    986   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
    987   // Nudged ranges: [0.0; 63.5].
    988   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
    989   // clang-format off
    990   RunTestFakeQuantWithMinMaxVarsPerChannel(
    991       8, true,
    992       TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f},
    993       {63.4f, 63.4f, 63.4f, 63.4f},
    994       TensorShape({1, 2, 3, 4}),
    995       {-0.1f,   0.0f,   0.1f,   0.25f,  0.5f,    0.75f,
    996         1.0f,   1.25f,  1.5f,   1.75f,  2.0f,    2.25f,
    997        63.0f,  63.25f, 63.3f,  63.4f,  63.5f,   63.6f,
    998        63.7f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
    999       { 0.0f,   0.0f,   0.0f,   0.25f,  0.5f,    0.75f,
   1000         1.0f,   1.25f,  1.5f,   1.75f,  2.0f,    2.25f,
   1001        63.0f,  63.25f, 63.25f, 63.5f,  63.5f,   63.5f,
   1002        63.5f,  63.5f,  63.5f,  63.5f,  63.5f,   63.5f});
   1003   // clang-format on
   1004 }
   1005 
   1006 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedUp_RegularRange) {
   1007   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
   1008   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
   1009   // Nudged ranges: [-0.25; 63.5].
   1010   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
   1011   // clang-format off
   1012   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1013       8, false,
   1014       TensorShape({4}), {-0.125f, -0.125f, -0.125f, -0.125f},
   1015       {63.625f, 63.625f, 63.625f, 63.625f},
   1016       TensorShape({1, 2, 3, 4}),
   1017       { -0.3f,  -0.25f, -0.2f,   0.0f,    0.25f,  0.5f,
   1018          0.75f,  1.0f,   1.25f,  1.5f,    1.75f,  2.0f,
   1019         63.0f,  63.25f, 63.4f,  63.5f,   63.6f,  63.7f,
   1020        100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
   1021       {-0.25f,  -0.25f, -0.25f,  0.0f,   0.25f,   0.5f,
   1022         0.75f,   1.0f,   1.25f,  1.5f,   1.75f,   2.0f,
   1023         63.0f,  63.25f, 63.5f,  63.5f,  63.5f,   63.5f,
   1024         63.5f,  63.5f,  63.5f,  63.5f,  63.5f,   63.5f});
   1025   // clang-format on
   1026 }
   1027 
   1028 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedUp_NarrowRange) {
   1029   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
   1030   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
   1031   // Nudged ranges: [-0.25; 63.25].
   1032   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
   1033   // clang-format off
   1034   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1035       8, true,
   1036       TensorShape({4}), {-0.125f, -0.125f, -0.125f, -0.125f},
   1037       {63.375f, 63.375f, 63.375f, 63.375f},
   1038       TensorShape({1, 2, 3, 4}),
   1039       { -0.3f,  -0.25f, -0.2f,   0.0f,   0.25f,   0.5f,
   1040          0.75f,  1.0f,   1.25f,  1.5f,   1.75f,   2.0f,
   1041         63.0f,  63.2f,  63.25f, 63.3f,  63.4f,   63.5f,
   1042        100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
   1043       { -0.25f, -0.25f, -0.25f,  0.0f,   0.25f,   0.5f,
   1044          0.75f,  1.0f,   1.25f,  1.5f,   1.75f,   2.0f,
   1045         63.0f,  63.25f, 63.25f, 63.25f, 63.25f,  63.25f,
   1046         63.25f, 63.25f, 63.25f, 63.25f, 63.25f,  63.25f});
   1047   // clang-format on
   1048 }
   1049 
   1050 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_4Bits_RegularRange) {
   1051   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
   1052   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
   1053   // Nudged range: [0.0; 7.5].
   1054   // Expected quantized values: 0.0, 0.5, ..., 7.5.
   1055   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1056       4, false, TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f},
   1057       {7.4f, 7.4f, 7.4f, 7.4f}, TensorShape({4}), {-0.1f, 0.0f, 7.5f, 7.6f},
   1058       {0.0f, 0.0f, 7.5f, 7.5f});
   1059 }
   1060 
   1061 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_4Bits_NarrowRange) {
   1062   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
   1063   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
   1064   // Nudged range: [0.0; 7.0].
   1065   // Expected quantized values: 0.0, 0.5, ..., 7.0.
   1066   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1067       4, true, TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f},
   1068       {6.9f, 6.9f, 6.9f, 6.9f}, TensorShape({4}), {-0.1f, 0.0f, 7.0f, 7.1f},
   1069       {0.0f, 0.0f, 7.0f, 7.0f});
   1070 }
   1071 
   1072 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedUp_4Bits_RegularRange) {
   1073   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
   1074   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
   1075   // Nudged range: [-0.5; 7.0].
   1076   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   1077   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1078       4, false, TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f},
   1079       {7.1f, 7.1f, 7.1f, 7.1f}, TensorShape({4}), {-0.6f, -0.5f, 7.0f, 7.1f},
   1080       {-0.5f, -0.5f, 7.0f, 7.0f});
   1081 }
   1082 
   1083 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedUp_4Bits_NarrowRange) {
   1084   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
   1085   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
   1086   // Nudged range: [-0.5; 6.5].
   1087   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
   1088   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1089       4, true, TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f},
   1090       {6.6f, 6.6f, 6.6f, 6.6f}, TensorShape({4}), {-0.6f, -0.5f, 6.5f, 6.6f},
   1091       {-0.5f, -0.5f, 6.5f, 6.5f});
   1092 }
   1093 
   1094 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedDown_4Bits_RegularRange) {
   1095   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
   1096   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
   1097   // Nudged range: [0.0; 7.5].
   1098   // Expected quantized values: 0.0, 0.5, ..., 7.5.
   1099   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1100       4, false, TensorShape({3}), {-0.1f, -0.1f, -0.1f}, {7.4f, 7.4f, 7.4f},
   1101       TensorShape({2, 3}), {-0.1f, 0.0f, 0.1f, 0.5f, 7.5f, 7.6f},
   1102       {0.0f, 0.0f, 0.0f, 0.5f, 7.5f, 7.5f});
   1103 }
   1104 
   1105 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedDown_4Bits_NarrowRange) {
   1106   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
   1107   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
   1108   // Nudged range: [0.0; 7.0].
   1109   // Expected quantized values: 0.0, 0.5, ..., 7.0.
   1110   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1111       4, true, TensorShape({3}), {-0.1f, -0.1f, -0.1f}, {6.9f, 6.9f, 6.9f},
   1112       TensorShape({2, 3}), {-0.1f, 0.0f, 0.1f, 0.5f, 7.0f, 7.1f},
   1113       {0.0f, 0.0f, 0.0f, 0.5f, 7.0f, 7.0f});
   1114 }
   1115 
   1116 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedUp_4Bits_RegularRange) {
   1117   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
   1118   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
   1119   // Nudged range: [-0.5; 7.0].
   1120   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   1121   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1122       4, false, TensorShape({3}), {-0.4f, -0.4f, -0.4f}, {7.1f, 7.1f, 7.1f},
   1123       TensorShape({2, 3}), {-0.51f, -0.5f, -0.24f, 0.0f, 7.0f, 7.1f},
   1124       {-0.5f, -0.5f, 0.0f, 0.0f, 7.0f, 7.0f});
   1125 }
   1126 
   1127 TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedUp_4Bits_NarrowRange) {
   1128   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
   1129   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
   1130   // Nudged range: [-0.5; 6.5].
   1131   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   1132   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1133       4, true, TensorShape({3}), {-0.4f, -0.4f, -0.4f}, {6.6f, 6.6f, 6.6f},
   1134       TensorShape({2, 3}), {-0.6f, -0.5f, -0.24f, 0.0f, 6.5f, 6.6f},
   1135       {-0.5f, -0.5f, 0.0f, 0.0f, 6.5f, 6.5f});
   1136 }
   1137 
   1138 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedDown_4Bits_RegularRange) {
   1139   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
   1140   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
   1141   // Nudged range: [0.0; 7.5].
   1142   // Expected quantized values: 0.0, 0.5, ..., 7.5.
   1143   // clang-format off
   1144   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1145       4, false,
   1146       TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f}, {7.4f, 7.4f, 7.4f, 7.4f},
   1147       TensorShape({1, 2, 3, 4}),
   1148       {-0.1f,   0.0f,   0.1f,   0.5f,   1.0f,    1.5f,
   1149         1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1150         6.0f,   6.5f,   7.0f,   7.4f,   7.5f,    7.7f,
   1151         7.8f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
   1152       { 0.0f,   0.0f,   0.0f,   0.5f,   1.0f,    1.5f,
   1153         1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1154         6.0f,   6.5f,   7.0f,   7.5f,   7.5f,    7.5f,
   1155         7.5f,   7.5f,   7.5f,   7.5f,   7.5f,    7.5f});
   1156   // clang-format on
   1157 }
   1158 
   1159 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedDown_4Bits_NarrowRange) {
   1160   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
   1161   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
   1162   // Nudged range: [0.0; 7.0].
   1163   // Expected quantized values: 0.0, 0.5, ..., 7.0.
   1164   // clang-format off
   1165   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1166       4, true,
   1167       TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f}, {6.9f, 6.9f, 6.9f, 6.9f},
   1168       TensorShape({1, 2, 3, 4}),
   1169       {-0.1f,   0.0f,   0.1f,   0.5f,   1.0f,    1.5f,
   1170         1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1171         6.0f,   6.5f,   6.8f,   6.9f,   7.0f,    7.1f,
   1172         7.2f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
   1173       { 0.0f,   0.0f,   0.0f,   0.5f,   1.0f,    1.5f,
   1174         1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1175         6.0f,   6.5f,   7.0f,   7.0f,   7.0f,    7.0f,
   1176         7.0f,   7.0f,   7.0f,   7.0f,   7.0f,    7.0f});
   1177   // clang-format on
   1178 }
   1179 
   1180 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedUp_4Bits_RegularRange) {
   1181   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
   1182   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
   1183   // Nudged range: [-0.5; 7.0].
   1184   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   1185   // clang-format off
   1186   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1187       4, false,
   1188       TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f}, {7.1f, 7.1f, 7.1f, 7.1f},
   1189       TensorShape({1, 2, 3, 4}),
   1190       { -0.6f,  -0.5f,  -0.4f,   0.0f,   0.5f,    1.0f,
   1191          1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1192          6.0f,   6.5f,   6.9f,   7.0f,   7.1f,    7.7f,
   1193        100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
   1194       { -0.5f, -0.5f,   -0.5f,   0.0f,   0.5f,    1.0f,
   1195          1.5f,  2.0f,    2.5f,   3.0f,   3.5f,    4.0f,
   1196          6.0f,  6.5f,    7.0f,   7.0f,   7.0f,    7.0f,
   1197          7.0f,  7.0f,    7.0f,   7.0f,   7.0f,    7.0f});
   1198   // clang-format on
   1199 }
   1200 
   1201 TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedUp_4Bits_NarrowRange) {
   1202   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
   1203   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
   1204   // Nudged range: [-0.5; 6.5].
   1205   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   1206   // clang-format off
   1207   RunTestFakeQuantWithMinMaxVarsPerChannel(
   1208       4, true,
   1209       TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f}, {6.6f, 6.6f, 6.6f, 6.6f},
   1210       TensorShape({1, 2, 3, 4}),
   1211       { -0.6f,  -0.5f,  -0.4f,   0.0f,   0.5f,    1.0f,
   1212          1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1213          5.5f,   6.0f,   6.4f,   6.5f,   6.6f,    6.7f,
   1214        100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 1000.0f},
   1215       { -0.5f , -0.5f,  -0.5f,   0.0f,   0.5f,    1.0f,
   1216          1.5f,   2.0f,   2.5f,   3.0f,   3.5f,    4.0f,
   1217          5.5f,   6.0f,   6.5f,   6.5f,   6.5f,    6.5f,
   1218          6.5f,   6.5f,   6.5f,   6.5f,   6.5f,    6.5f});
   1219   // clang-format on
   1220 }
   1221 
   1222 TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedDown_ZeroMinAndMax) {
   1223   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1224                    .Attr("narrow_range", false)
   1225                    .Input(FakeInput(DT_FLOAT))  // gradients
   1226                    .Input(FakeInput(DT_FLOAT))  // inputs
   1227                    .Input(FakeInput(DT_FLOAT))  // min
   1228                    .Input(FakeInput(DT_FLOAT))  // max
   1229                    .Finalize(node_def()));
   1230   TF_EXPECT_OK(InitOp());
   1231   // Upstream gradients.
   1232   AddRandomInput(TensorShape({4}));
   1233   // Downstream inputs.
   1234   AddInputFromArray<float>(TensorShape({4}), {0.0, 0.0, 0.0, 0.0f});
   1235   // Min.
   1236   AddInputFromArray<float>(TensorShape({4}), {0.0, 0.0, 0.0, 0.0f});
   1237   // Max.
   1238   AddInputFromArray<float>(TensorShape({4}), {0.0, 0.0, 0.0, 0.0f});
   1239 
   1240   // Tested code.
   1241   TF_ASSERT_OK(RunOpKernel());
   1242 
   1243   Tensor* output_bprop_wrt_input = GetOutput(0);
   1244   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1245   auto grad_flat = GetInput(0).flat<float>();
   1246   FillValues<float>(&expected_bprop_wrt_input,
   1247                     {grad_flat(0), grad_flat(1), grad_flat(2), grad_flat(3)});
   1248   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1249 
   1250   Tensor* output_bprop_wrt_min = GetOutput(1);
   1251   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1252   FillValues<float>(&expected_bprop_wrt_min, {0.0f, 0.0f, 0.0f, 0.0f});
   1253   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1254 
   1255   Tensor* output_bprop_wrt_max = GetOutput(2);
   1256   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1257   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, 0.0f});
   1258   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1259 }
   1260 
   1261 TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedDown_RegularRange) {
   1262   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
   1263   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
   1264   // Nudged ranges: [0.0; 63.75].
   1265   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
   1266   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1267                    .Attr("narrow_range", false)
   1268                    .Input(FakeInput(DT_FLOAT))  // gradients
   1269                    .Input(FakeInput(DT_FLOAT))  // inputs
   1270                    .Input(FakeInput(DT_FLOAT))  // min
   1271                    .Input(FakeInput(DT_FLOAT))  // max
   1272                    .Finalize(node_def()));
   1273   TF_EXPECT_OK(InitOp());
   1274   // Upstream gradients.
   1275   AddRandomInput(TensorShape({4}));
   1276   // Downstream inputs.
   1277   AddInputFromArray<float>(TensorShape({4}), {-0.1f, 0.0f, 63.75f, 63.8f});
   1278   // Min.
   1279   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   1280   // Max.
   1281   AddInputFromArray<float>(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f});
   1282 
   1283   // Tested code.
   1284   TF_ASSERT_OK(RunOpKernel());
   1285 
   1286   Tensor* output_bprop_wrt_input = GetOutput(0);
   1287   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1288   auto grad_flat = GetInput(0).flat<float>();
   1289   FillValues<float>(&expected_bprop_wrt_input,
   1290                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1291   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1292 
   1293   Tensor* output_bprop_wrt_min = GetOutput(1);
   1294   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1295   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1296   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1297 
   1298   Tensor* output_bprop_wrt_max = GetOutput(2);
   1299   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1300   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1301   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1302 }
   1303 
   1304 TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedDown_NarrowRange) {
   1305   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
   1306   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
   1307   // Nudged ranges: [0.0; 63.5].
   1308   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
   1309   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1310                    .Attr("narrow_range", true)
   1311                    .Input(FakeInput(DT_FLOAT))  // gradients
   1312                    .Input(FakeInput(DT_FLOAT))  // inputs
   1313                    .Input(FakeInput(DT_FLOAT))  // min
   1314                    .Input(FakeInput(DT_FLOAT))  // max
   1315                    .Finalize(node_def()));
   1316   TF_EXPECT_OK(InitOp());
   1317   // Upstream gradients.
   1318   AddRandomInput(TensorShape({4}));
   1319   // Downstream inputs.
   1320   AddInputFromArray<float>(TensorShape({4}), {-0.1f, 0.0f, 63.5f, 63.6f});
   1321   // Min.
   1322   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   1323   // Max.
   1324   AddInputFromArray<float>(TensorShape({4}), {63.4f, 63.4f, 63.4f, 63.4f});
   1325 
   1326   // Tested code.
   1327   TF_ASSERT_OK(RunOpKernel());
   1328 
   1329   Tensor* output_bprop_wrt_input = GetOutput(0);
   1330   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1331   auto grad_flat = GetInput(0).flat<float>();
   1332   FillValues<float>(&expected_bprop_wrt_input,
   1333                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1334   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1335 
   1336   Tensor* output_bprop_wrt_min = GetOutput(1);
   1337   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1338   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1339   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1340 
   1341   Tensor* output_bprop_wrt_max = GetOutput(2);
   1342   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1343   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1344   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1345 }
   1346 
   1347 TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedUp_RegularRange) {
   1348   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
   1349   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
   1350   // Nudged ranges: [-0.25; 63.5].
   1351   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
   1352   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1353                    .Attr("narrow_range", false)
   1354                    .Input(FakeInput(DT_FLOAT))  // gradients
   1355                    .Input(FakeInput(DT_FLOAT))  // inputs
   1356                    .Input(FakeInput(DT_FLOAT))  // min
   1357                    .Input(FakeInput(DT_FLOAT))  // max
   1358                    .Finalize(node_def()));
   1359   TF_EXPECT_OK(InitOp());
   1360   // Upstream gradients.
   1361   AddRandomInput(TensorShape({4}));
   1362   // Downstream inputs.
   1363   AddInputFromArray<float>(TensorShape({4}), {-0.3f, -0.25f, 63.5f, 63.6f});
   1364   // Min.
   1365   AddInputFromArray<float>(TensorShape({4}),
   1366                            {-0.125f, -0.125f, -0.125f, -0.125f});
   1367   // Max.
   1368   AddInputFromArray<float>(TensorShape({4}),
   1369                            {63.625f, 63.625f, 63.625f, 63.625f});
   1370 
   1371   // Tested code.
   1372   TF_ASSERT_OK(RunOpKernel());
   1373 
   1374   Tensor* output_bprop_wrt_input = GetOutput(0);
   1375   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1376   auto grad_flat = GetInput(0).flat<float>();
   1377   FillValues<float>(&expected_bprop_wrt_input,
   1378                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1379   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1380 
   1381   Tensor* output_bprop_wrt_min = GetOutput(1);
   1382   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1383   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1384   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1385 
   1386   Tensor* output_bprop_wrt_max = GetOutput(2);
   1387   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1388   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1389   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1390 }
   1391 
   1392 TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedUp_NarrowRange) {
   1393   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
   1394   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
   1395   // Nudged ranges: [-0.25; 63.25].
   1396   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
   1397   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1398                    .Attr("narrow_range", true)
   1399                    .Input(FakeInput(DT_FLOAT))  // gradients
   1400                    .Input(FakeInput(DT_FLOAT))  // inputs
   1401                    .Input(FakeInput(DT_FLOAT))  // min
   1402                    .Input(FakeInput(DT_FLOAT))  // max
   1403                    .Finalize(node_def()));
   1404   TF_EXPECT_OK(InitOp());
   1405   // Upstream gradients.
   1406   AddRandomInput(TensorShape({4}));
   1407   // Downstream inputs.
   1408   AddInputFromArray<float>(TensorShape({4}), {-0.3f, -0.25f, 63.25f, 63.3f});
   1409   // Min.
   1410   AddInputFromArray<float>(TensorShape({4}),
   1411                            {-0.125f, -0.125f, -0.125f, -0.125f});
   1412   // Max.
   1413   AddInputFromArray<float>(TensorShape({4}),
   1414                            {63.375f, 63.375f, 63.375f, 63.375f});
   1415 
   1416   // Tested code.
   1417   TF_ASSERT_OK(RunOpKernel());
   1418 
   1419   Tensor* output_bprop_wrt_input = GetOutput(0);
   1420   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1421   auto grad_flat = GetInput(0).flat<float>();
   1422   FillValues<float>(&expected_bprop_wrt_input,
   1423                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1424   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1425 
   1426   Tensor* output_bprop_wrt_min = GetOutput(1);
   1427   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1428   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1429   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1430 
   1431   Tensor* output_bprop_wrt_max = GetOutput(2);
   1432   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1433   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1434   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1435 }
   1436 
   1437 TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedDown_RegularRange) {
   1438   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
   1439   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
   1440   // Nudged ranges: [0.0; 63.75].
   1441   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
   1442   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1443                    .Attr("narrow_range", false)
   1444                    .Input(FakeInput(DT_FLOAT))  // gradients
   1445                    .Input(FakeInput(DT_FLOAT))  // inputs
   1446                    .Input(FakeInput(DT_FLOAT))  // min
   1447                    .Input(FakeInput(DT_FLOAT))  // max
   1448                    .Finalize(node_def()));
   1449   TF_EXPECT_OK(InitOp());
   1450   // Upstream gradients.
   1451   AddRandomInput(TensorShape({2, 3}));
   1452   // Downstream inputs.
   1453   AddInputFromArray<float>(TensorShape({2, 3}),
   1454                            {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f});
   1455   // Min.
   1456   AddInputFromArray<float>(TensorShape({3}), {-0.1f, -0.1f, -0.1f});
   1457   // Max.
   1458   AddInputFromArray<float>(TensorShape({3}), {63.65f, 63.65f, 63.65f});
   1459 
   1460   // Tested code.
   1461   TF_ASSERT_OK(RunOpKernel());
   1462 
   1463   Tensor* output_bprop_wrt_input = GetOutput(0);
   1464   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   1465   auto grad_flat = GetInput(0).flat<float>();
   1466   FillValues<float>(
   1467       &expected_bprop_wrt_input,
   1468       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   1469   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1470 
   1471   Tensor* output_bprop_wrt_min = GetOutput(1);
   1472   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   1473   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   1474   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1475 
   1476   Tensor* output_bprop_wrt_max = GetOutput(2);
   1477   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   1478   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   1479   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1480 }
   1481 
   1482 TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedDown_NarrowRange) {
   1483   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
   1484   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
   1485   // Nudged ranges: [0.0; 63.5].
   1486   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
   1487   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1488                    .Attr("narrow_range", true)
   1489                    .Input(FakeInput(DT_FLOAT))  // gradients
   1490                    .Input(FakeInput(DT_FLOAT))  // inputs
   1491                    .Input(FakeInput(DT_FLOAT))  // min
   1492                    .Input(FakeInput(DT_FLOAT))  // max
   1493                    .Finalize(node_def()));
   1494   TF_EXPECT_OK(InitOp());
   1495   // Upstream gradients.
   1496   AddRandomInput(TensorShape({2, 3}));
   1497   // Downstream inputs.
   1498   AddInputFromArray<float>(TensorShape({2, 3}),
   1499                            {-0.1f, 0.0f, 0.1f, 0.25f, 63.5f, 63.6f});
   1500   // Min.
   1501   AddInputFromArray<float>(TensorShape({3}), {-0.1f, -0.1f, -0.1f});
   1502   // Max.
   1503   AddInputFromArray<float>(TensorShape({3}), {63.4f, 63.4f, 63.4f});
   1504 
   1505   // Tested code.
   1506   TF_ASSERT_OK(RunOpKernel());
   1507 
   1508   Tensor* output_bprop_wrt_input = GetOutput(0);
   1509   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   1510   auto grad_flat = GetInput(0).flat<float>();
   1511   FillValues<float>(
   1512       &expected_bprop_wrt_input,
   1513       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   1514   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1515 
   1516   Tensor* output_bprop_wrt_min = GetOutput(1);
   1517   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   1518   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   1519   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1520 
   1521   Tensor* output_bprop_wrt_max = GetOutput(2);
   1522   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   1523   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   1524   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1525 }
   1526 
   1527 TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedUp_RegularRange) {
   1528   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
   1529   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
   1530   // Nudged ranges: [-0.25; 63.5].
   1531   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
   1532   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1533                    .Attr("narrow_range", false)
   1534                    .Input(FakeInput(DT_FLOAT))  // gradients
   1535                    .Input(FakeInput(DT_FLOAT))  // inputs
   1536                    .Input(FakeInput(DT_FLOAT))  // min
   1537                    .Input(FakeInput(DT_FLOAT))  // max
   1538                    .Finalize(node_def()));
   1539   TF_EXPECT_OK(InitOp());
   1540   // Upstream gradients.
   1541   AddRandomInput(TensorShape({2, 3}));
   1542   // Downstream inputs.
   1543   AddInputFromArray<float>(TensorShape({2, 3}),
   1544                            {-0.3f, -0.25f, -0.2f, 0.0f, 63.5f, 63.6f});
   1545   // Min.
   1546   AddInputFromArray<float>(TensorShape({3}), {-0.125f, -0.125f, -0.125f});
   1547   // Max.
   1548   AddInputFromArray<float>(TensorShape({3}), {63.625f, 63.625f, 63.625f});
   1549 
   1550   // Tested code.
   1551   TF_ASSERT_OK(RunOpKernel());
   1552 
   1553   Tensor* output_bprop_wrt_input = GetOutput(0);
   1554   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   1555   auto grad_flat = GetInput(0).flat<float>();
   1556   FillValues<float>(
   1557       &expected_bprop_wrt_input,
   1558       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   1559   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1560 
   1561   Tensor* output_bprop_wrt_min = GetOutput(1);
   1562   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   1563   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   1564   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1565 
   1566   Tensor* output_bprop_wrt_max = GetOutput(2);
   1567   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   1568   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   1569   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1570 }
   1571 
   1572 TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedUp_NarrowRange) {
   1573   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
   1574   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
   1575   // Nudged ranges: [-0.25; 63.25].
   1576   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
   1577   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1578                    .Attr("narrow_range", true)
   1579                    .Input(FakeInput(DT_FLOAT))  // gradients
   1580                    .Input(FakeInput(DT_FLOAT))  // inputs
   1581                    .Input(FakeInput(DT_FLOAT))  // min
   1582                    .Input(FakeInput(DT_FLOAT))  // max
   1583                    .Finalize(node_def()));
   1584   TF_EXPECT_OK(InitOp());
   1585   // Upstream gradients.
   1586   AddRandomInput(TensorShape({2, 3}));
   1587   // Downstream inputs.
   1588   AddInputFromArray<float>(TensorShape({2, 3}),
   1589                            {-0.3f, -0.25f, -0.2f, 0.0f, 63.25f, 63.3f});
   1590   // Min.
   1591   AddInputFromArray<float>(TensorShape({3}), {-0.125f, -0.125f, -0.125f});
   1592   // Max.
   1593   AddInputFromArray<float>(TensorShape({3}), {63.375f, 63.375f, 63.375f});
   1594 
   1595   // Tested code.
   1596   TF_ASSERT_OK(RunOpKernel());
   1597 
   1598   Tensor* output_bprop_wrt_input = GetOutput(0);
   1599   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   1600   auto grad_flat = GetInput(0).flat<float>();
   1601   FillValues<float>(
   1602       &expected_bprop_wrt_input,
   1603       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   1604   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1605 
   1606   Tensor* output_bprop_wrt_min = GetOutput(1);
   1607   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   1608   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   1609   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1610 
   1611   Tensor* output_bprop_wrt_max = GetOutput(2);
   1612   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   1613   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   1614   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1615 }
   1616 
   1617 TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedDown_RegularRange) {
   1618   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
   1619   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
   1620   // Nudged ranges: [0.0; 63.75].
   1621   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
   1622   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1623                    .Attr("narrow_range", false)
   1624                    .Input(FakeInput(DT_FLOAT))  // gradients
   1625                    .Input(FakeInput(DT_FLOAT))  // inputs
   1626                    .Input(FakeInput(DT_FLOAT))  // min
   1627                    .Input(FakeInput(DT_FLOAT))  // max
   1628                    .Finalize(node_def()));
   1629   TF_EXPECT_OK(InitOp());
   1630   // Upstream gradients.
   1631   AddRandomInput(TensorShape({1, 2, 3, 4}));
   1632   // Downstream inputs.
   1633   // clang-format off
   1634   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   1635                            {-0.1f,   0.0f, 63.75f, 63.8f, -0.1f,   0.0f,
   1636                             63.75f, 63.8f, -0.1f,   0.0f, 63.75f, 63.8f,
   1637                             -0.1f,   0.0f, 63.75f, 63.8f, -0.1f,   0.0f,
   1638                             63.75f, 63.8f, -0.1f,   0.0f, 63.75f, 63.8f});
   1639   // clang-format on
   1640   // Min.
   1641   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   1642   // Max.
   1643   AddInputFromArray<float>(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f});
   1644 
   1645   // Tested code.
   1646   TF_ASSERT_OK(RunOpKernel());
   1647 
   1648   Tensor* output_bprop_wrt_input = GetOutput(0);
   1649   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   1650                                   TensorShape({1, 2, 3, 4}));
   1651   auto grad_flat = GetInput(0).flat<float>();
   1652   FillValues<float>(&expected_bprop_wrt_input,
   1653                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   1654                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   1655                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   1656                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   1657                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   1658                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   1659   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1660 
   1661   Tensor* output_bprop_wrt_min = GetOutput(1);
   1662   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1663   FillValues<float>(&expected_bprop_wrt_min,
   1664                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   1665                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   1666                      0.0f, 0.0f, 0.0f});
   1667   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1668 
   1669   Tensor* output_bprop_wrt_max = GetOutput(2);
   1670   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1671   FillValues<float>(&expected_bprop_wrt_max,
   1672                     {0.0f, 0.0f, 0.0f,
   1673                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   1674                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   1675   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1676 }
   1677 
   1678 TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedDown_NarrowRange) {
   1679   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 254 / 4].
   1680   // Scale: 1/4,  original zero point: 1.4, nudged to 1.
   1681   // Nudged ranges: [0.0; 63.5].
   1682   // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.5.
   1683   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1684                    .Attr("narrow_range", true)
   1685                    .Input(FakeInput(DT_FLOAT))  // gradients
   1686                    .Input(FakeInput(DT_FLOAT))  // inputs
   1687                    .Input(FakeInput(DT_FLOAT))  // min
   1688                    .Input(FakeInput(DT_FLOAT))  // max
   1689                    .Finalize(node_def()));
   1690   TF_EXPECT_OK(InitOp());
   1691   // Upstream gradients.
   1692   AddRandomInput(TensorShape({1, 2, 3, 4}));
   1693   // Downstream inputs.
   1694   // clang-format off
   1695   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   1696                            {-0.1f,  0.0f, 63.5f, 63.6f, -0.1f,  0.0f,
   1697                             63.5f, 63.6f, -0.1f,  0.0f, 63.5f, 63.6f,
   1698                             -0.1f,  0.0f, 63.5f, 63.6f, -0.1f,  0.0f,
   1699                             63.5f, 63.6f, -0.1f,  0.0f, 63.5f, 63.6f});
   1700   // clang-format on
   1701   // Min.
   1702   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   1703   // Max.
   1704   AddInputFromArray<float>(TensorShape({4}), {63.4f, 63.4f, 63.4f, 63.4f});
   1705 
   1706   // Tested code.
   1707   TF_ASSERT_OK(RunOpKernel());
   1708 
   1709   Tensor* output_bprop_wrt_input = GetOutput(0);
   1710   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   1711                                   TensorShape({1, 2, 3, 4}));
   1712   auto grad_flat = GetInput(0).flat<float>();
   1713   FillValues<float>(&expected_bprop_wrt_input,
   1714                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   1715                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   1716                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   1717                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   1718                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   1719                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   1720   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1721 
   1722   Tensor* output_bprop_wrt_min = GetOutput(1);
   1723   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1724   FillValues<float>(&expected_bprop_wrt_min,
   1725                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   1726                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   1727                      0.0f, 0.0f, 0.0f});
   1728   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1729 
   1730   Tensor* output_bprop_wrt_max = GetOutput(2);
   1731   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1732   FillValues<float>(&expected_bprop_wrt_max,
   1733                     {0.0f, 0.0f, 0.0f,
   1734                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   1735                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   1736   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1737 }
   1738 
   1739 TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedUp_RegularRange) {
   1740   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
   1741   // Scale: 1/4,  original zero point: 0.5, nudged to 1.
   1742   // Nudged ranges: [-0.25; 63.5].
   1743   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
   1744   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1745                    .Attr("narrow_range", false)
   1746                    .Input(FakeInput(DT_FLOAT))  // gradients
   1747                    .Input(FakeInput(DT_FLOAT))  // inputs
   1748                    .Input(FakeInput(DT_FLOAT))  // min
   1749                    .Input(FakeInput(DT_FLOAT))  // max
   1750                    .Finalize(node_def()));
   1751   TF_EXPECT_OK(InitOp());
   1752   // Upstream gradients.
   1753   AddRandomInput(TensorShape({1, 2, 3, 4}));
   1754   // Downstream inputs.
   1755   // clang-format off
   1756   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   1757                            {-0.3f, -0.25f, 63.5f, 63.6f,  -0.3f, -0.25f,
   1758                             63.5f, 63.6f,  -0.3f, -0.25f, 63.5f, 63.6f,
   1759                             -0.3f, -0.25f, 63.5f, 63.6f,  -0.3f, -0.25f,
   1760                             63.5f, 63.6f,  -0.3f, -0.25f, 63.5f, 63.6f});
   1761   // clang-format on
   1762   // Min.
   1763   AddInputFromArray<float>(TensorShape({4}),
   1764                            {-0.125f, -0.125f, -0.125f, -0.125f});
   1765   // Max.
   1766   AddInputFromArray<float>(TensorShape({4}),
   1767                            {63.625f, 63.625f, 63.625f, 63.625f});
   1768 
   1769   // Tested code.
   1770   TF_ASSERT_OK(RunOpKernel());
   1771 
   1772   Tensor* output_bprop_wrt_input = GetOutput(0);
   1773   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   1774                                   TensorShape({1, 2, 3, 4}));
   1775   auto grad_flat = GetInput(0).flat<float>();
   1776   FillValues<float>(&expected_bprop_wrt_input,
   1777                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   1778                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   1779                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   1780                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   1781                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   1782                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   1783   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1784 
   1785   Tensor* output_bprop_wrt_min = GetOutput(1);
   1786   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1787   FillValues<float>(&expected_bprop_wrt_min,
   1788                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   1789                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   1790                      0.0f, 0.0f, 0.0f});
   1791   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1792 
   1793   Tensor* output_bprop_wrt_max = GetOutput(2);
   1794   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1795   FillValues<float>(&expected_bprop_wrt_max,
   1796                     {0.0f, 0.0f, 0.0f,
   1797                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   1798                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   1799   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1800 }
   1801 
   1802 TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedUp_NarrowRange) {
   1803   // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 254 / 4].
   1804   // Scale: 1/4,  original zero point: 1.5, nudged to 2.
   1805   // Nudged ranges: [-0.25; 63.25].
   1806   // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.25.
   1807   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1808                    .Attr("narrow_range", true)
   1809                    .Input(FakeInput(DT_FLOAT))  // gradients
   1810                    .Input(FakeInput(DT_FLOAT))  // inputs
   1811                    .Input(FakeInput(DT_FLOAT))  // min
   1812                    .Input(FakeInput(DT_FLOAT))  // max
   1813                    .Finalize(node_def()));
   1814   TF_EXPECT_OK(InitOp());
   1815   // Upstream gradients.
   1816   AddRandomInput(TensorShape({1, 2, 3, 4}));
   1817   // Downstream inputs.
   1818   // clang-format off
   1819   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   1820                            { -0.3f,  -0.25f, 63.25f, 63.3f,  -0.3f,  -0.25f,
   1821                              63.25f, 63.3f,  -0.3f,  -0.25f, 63.25f, 63.3f,
   1822                              -0.3f,  -0.25f, 63.25f, 63.3f,  -0.3f,  -0.25f,
   1823                              63.25f, 63.3f,  -0.3f,  -0.25f, 63.25f, 63.3f});
   1824   // clang-format on
   1825   // Min.
   1826   AddInputFromArray<float>(TensorShape({4}),
   1827                            {-0.125f, -0.125f, -0.125f, -0.125f});
   1828   // Max.
   1829   AddInputFromArray<float>(TensorShape({4}),
   1830                            {63.375f, 63.375f, 63.375f, 63.375f});
   1831 
   1832   // Tested code.
   1833   TF_ASSERT_OK(RunOpKernel());
   1834 
   1835   Tensor* output_bprop_wrt_input = GetOutput(0);
   1836   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   1837                                   TensorShape({1, 2, 3, 4}));
   1838   auto grad_flat = GetInput(0).flat<float>();
   1839   FillValues<float>(&expected_bprop_wrt_input,
   1840                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   1841                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   1842                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   1843                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   1844                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   1845                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   1846   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1847 
   1848   Tensor* output_bprop_wrt_min = GetOutput(1);
   1849   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1850   FillValues<float>(&expected_bprop_wrt_min,
   1851                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   1852                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   1853                      0.0f, 0.0f, 0.0f});
   1854   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1855 
   1856   Tensor* output_bprop_wrt_max = GetOutput(2);
   1857   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1858   FillValues<float>(&expected_bprop_wrt_max,
   1859                     {0.0f, 0.0f, 0.0f,
   1860                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   1861                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   1862   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1863 }
   1864 
   1865 TEST_F(QuantOpsTest,
   1866        WithVarsPerChannelDim1GradientNudgedDown_4Bits_RegularRange) {
   1867   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
   1868   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
   1869   // Nudged range: [0.0; 7.5].
   1870   // Expected quantized values: 0.0, 0.5, ..., 7.5.
   1871   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1872                    .Attr("num_bits", 4)
   1873                    .Attr("narrow_range", false)
   1874                    .Input(FakeInput(DT_FLOAT))  // gradients
   1875                    .Input(FakeInput(DT_FLOAT))  // inputs
   1876                    .Input(FakeInput(DT_FLOAT))  // min
   1877                    .Input(FakeInput(DT_FLOAT))  // max
   1878                    .Finalize(node_def()));
   1879   TF_EXPECT_OK(InitOp());
   1880   // Upstream gradients.
   1881   AddRandomInput(TensorShape({4}));
   1882   // Downstream inputs.
   1883   AddInputFromArray<float>(TensorShape({4}), {-0.1f, 0.0f, 7.5f, 7.6f});
   1884   // Min.
   1885   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   1886   // Max.
   1887   AddInputFromArray<float>(TensorShape({4}), {7.4f, 7.4f, 7.4f, 7.4f});
   1888 
   1889   // Tested code.
   1890   TF_ASSERT_OK(RunOpKernel());
   1891 
   1892   Tensor* output_bprop_wrt_input = GetOutput(0);
   1893   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1894   auto grad_flat = GetInput(0).flat<float>();
   1895   FillValues<float>(&expected_bprop_wrt_input,
   1896                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1897   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1898 
   1899   Tensor* output_bprop_wrt_min = GetOutput(1);
   1900   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1901   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1902   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1903 
   1904   Tensor* output_bprop_wrt_max = GetOutput(2);
   1905   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1906   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1907   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1908 }
   1909 
   1910 TEST_F(QuantOpsTest,
   1911        WithVarsPerChannelDim1GradientNudgedDown_4Bits_NarrowRange) {
   1912   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
   1913   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
   1914   // Nudged range: [0.0; 7.0].
   1915   // Expected quantized values: 0.0, 0.5, ..., 7.0.
   1916   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1917                    .Attr("num_bits", 4)
   1918                    .Attr("narrow_range", true)
   1919                    .Input(FakeInput(DT_FLOAT))  // gradients
   1920                    .Input(FakeInput(DT_FLOAT))  // inputs
   1921                    .Input(FakeInput(DT_FLOAT))  // min
   1922                    .Input(FakeInput(DT_FLOAT))  // max
   1923                    .Finalize(node_def()));
   1924   TF_EXPECT_OK(InitOp());
   1925   // Upstream gradients.
   1926   AddRandomInput(TensorShape({4}));
   1927   // Downstream inputs.
   1928   AddInputFromArray<float>(TensorShape({4}), {-0.1f, 0.0f, 7.0f, 7.1f});
   1929   // Min.
   1930   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   1931   // Max.
   1932   AddInputFromArray<float>(TensorShape({4}), {6.9f, 6.9f, 6.9f, 6.9f});
   1933 
   1934   // Tested code.
   1935   TF_ASSERT_OK(RunOpKernel());
   1936 
   1937   Tensor* output_bprop_wrt_input = GetOutput(0);
   1938   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1939   auto grad_flat = GetInput(0).flat<float>();
   1940   FillValues<float>(&expected_bprop_wrt_input,
   1941                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1942   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1943 
   1944   Tensor* output_bprop_wrt_min = GetOutput(1);
   1945   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1946   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1947   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1948 
   1949   Tensor* output_bprop_wrt_max = GetOutput(2);
   1950   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1951   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1952   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1953 }
   1954 
   1955 TEST_F(QuantOpsTest,
   1956        WithVarsPerChannelDim1GradientNudgedUp_4Bits_RegularRange) {
   1957   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
   1958   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
   1959   // Nudged range: [-0.5; 7.0].
   1960   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   1961   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   1962                    .Attr("num_bits", 4)
   1963                    .Attr("narrow_range", false)
   1964                    .Input(FakeInput(DT_FLOAT))  // gradients
   1965                    .Input(FakeInput(DT_FLOAT))  // inputs
   1966                    .Input(FakeInput(DT_FLOAT))  // min
   1967                    .Input(FakeInput(DT_FLOAT))  // max
   1968                    .Finalize(node_def()));
   1969   TF_EXPECT_OK(InitOp());
   1970   // Upstream gradients.
   1971   AddRandomInput(TensorShape({4}));
   1972   // Downstream inputs.
   1973   AddInputFromArray<float>(TensorShape({4}), {-0.6f, -0.5f, 7.0f, 7.1f});
   1974   // Min.
   1975   AddInputFromArray<float>(TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f});
   1976   // Max.
   1977   AddInputFromArray<float>(TensorShape({4}), {7.1f, 7.1f, 7.1f, 7.1f});
   1978 
   1979   // Tested code.
   1980   TF_ASSERT_OK(RunOpKernel());
   1981 
   1982   Tensor* output_bprop_wrt_input = GetOutput(0);
   1983   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   1984   auto grad_flat = GetInput(0).flat<float>();
   1985   FillValues<float>(&expected_bprop_wrt_input,
   1986                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   1987   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   1988 
   1989   Tensor* output_bprop_wrt_min = GetOutput(1);
   1990   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   1991   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   1992   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   1993 
   1994   Tensor* output_bprop_wrt_max = GetOutput(2);
   1995   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   1996   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   1997   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   1998 }
   1999 
   2000 TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedUp_4Bits_NarrowRange) {
   2001   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
   2002   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
   2003   // Nudged range: [-0.5; 6.5].
   2004   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
   2005   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2006                    .Attr("num_bits", 4)
   2007                    .Attr("narrow_range", true)
   2008                    .Input(FakeInput(DT_FLOAT))  // gradients
   2009                    .Input(FakeInput(DT_FLOAT))  // inputs
   2010                    .Input(FakeInput(DT_FLOAT))  // min
   2011                    .Input(FakeInput(DT_FLOAT))  // max
   2012                    .Finalize(node_def()));
   2013   TF_EXPECT_OK(InitOp());
   2014   // Upstream gradients.
   2015   AddRandomInput(TensorShape({4}));
   2016   // Downstream inputs.
   2017   AddInputFromArray<float>(TensorShape({4}), {-0.6f, -0.5f, 6.5f, 6.6f});
   2018   // Min.
   2019   AddInputFromArray<float>(TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f});
   2020   // Max.
   2021   AddInputFromArray<float>(TensorShape({4}), {6.6f, 6.6f, 6.6f, 6.6f});
   2022 
   2023   // Tested code.
   2024   TF_ASSERT_OK(RunOpKernel());
   2025 
   2026   Tensor* output_bprop_wrt_input = GetOutput(0);
   2027   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
   2028   auto grad_flat = GetInput(0).flat<float>();
   2029   FillValues<float>(&expected_bprop_wrt_input,
   2030                     {0.0f, grad_flat(1), grad_flat(2), 0.0f});
   2031   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2032 
   2033   Tensor* output_bprop_wrt_min = GetOutput(1);
   2034   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   2035   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f, 0.0f});
   2036   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2037 
   2038   Tensor* output_bprop_wrt_max = GetOutput(2);
   2039   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   2040   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, 0.0f, grad_flat(3)});
   2041   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2042 }
   2043 
   2044 TEST_F(QuantOpsTest,
   2045        WithVarsPerChannelDim2GradientNudgedDown_4Bits_RegularRange) {
   2046   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
   2047   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
   2048   // Nudged range: [0.0; 7.5].
   2049   // Expected quantized values: 0.0, 0.5, ..., 7.5.
   2050   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2051                    .Attr("num_bits", 4)
   2052                    .Attr("narrow_range", false)
   2053                    .Input(FakeInput(DT_FLOAT))  // gradients
   2054                    .Input(FakeInput(DT_FLOAT))  // inputs
   2055                    .Input(FakeInput(DT_FLOAT))  // min
   2056                    .Input(FakeInput(DT_FLOAT))  // max
   2057                    .Finalize(node_def()));
   2058   TF_EXPECT_OK(InitOp());
   2059   // Upstream gradients.
   2060   AddRandomInput(TensorShape({2, 3}));
   2061   // Downstream inputs.
   2062   AddInputFromArray<float>(TensorShape({2, 3}),
   2063                            {-0.1f, 0.0f, 0.1f, 0.5f, 7.5f, 7.6f});
   2064   // Min.
   2065   AddInputFromArray<float>(TensorShape({3}), {-0.1f, -0.1f, -0.1f});
   2066   // Max.
   2067   AddInputFromArray<float>(TensorShape({3}), {7.4f, 7.4f, 7.4f});
   2068 
   2069   // Tested code.
   2070   TF_ASSERT_OK(RunOpKernel());
   2071 
   2072   Tensor* output_bprop_wrt_input = GetOutput(0);
   2073   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   2074   auto grad_flat = GetInput(0).flat<float>();
   2075   FillValues<float>(
   2076       &expected_bprop_wrt_input,
   2077       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   2078   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2079 
   2080   Tensor* output_bprop_wrt_min = GetOutput(1);
   2081   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   2082   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   2083   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2084 
   2085   Tensor* output_bprop_wrt_max = GetOutput(2);
   2086   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   2087   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   2088   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2089 }
   2090 
   2091 TEST_F(QuantOpsTest,
   2092        WithVarsPerChannelDim2GradientNudgedDown_4Bits_NarrowRange) {
   2093   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
   2094   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
   2095   // Nudged range: [0.0; 7.0].
   2096   // Expected quantized values: 0.0, 0.5, ..., 7.0.
   2097   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2098                    .Attr("num_bits", 4)
   2099                    .Attr("narrow_range", true)
   2100                    .Input(FakeInput(DT_FLOAT))  // gradients
   2101                    .Input(FakeInput(DT_FLOAT))  // inputs
   2102                    .Input(FakeInput(DT_FLOAT))  // min
   2103                    .Input(FakeInput(DT_FLOAT))  // max
   2104                    .Finalize(node_def()));
   2105   TF_EXPECT_OK(InitOp());
   2106   // Upstream gradients.
   2107   AddRandomInput(TensorShape({2, 3}));
   2108   // Downstream inputs.
   2109   AddInputFromArray<float>(TensorShape({2, 3}),
   2110                            {-0.1f, 0.0f, 0.1f, 0.5f, 7.0f, 7.1f});
   2111   // Min.
   2112   AddInputFromArray<float>(TensorShape({3}), {-0.1f, -0.1f, -0.1f});
   2113   // Max.
   2114   AddInputFromArray<float>(TensorShape({3}), {6.9f, 6.9f, 6.9f});
   2115 
   2116   // Tested code.
   2117   TF_ASSERT_OK(RunOpKernel());
   2118 
   2119   Tensor* output_bprop_wrt_input = GetOutput(0);
   2120   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   2121   auto grad_flat = GetInput(0).flat<float>();
   2122   FillValues<float>(
   2123       &expected_bprop_wrt_input,
   2124       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   2125   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2126 
   2127   Tensor* output_bprop_wrt_min = GetOutput(1);
   2128   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   2129   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   2130   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2131 
   2132   Tensor* output_bprop_wrt_max = GetOutput(2);
   2133   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   2134   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   2135   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2136 }
   2137 
   2138 TEST_F(QuantOpsTest,
   2139        WithVarsPerChannelDim2GradientNudgedUp_4Bits_RegularRange) {
   2140   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
   2141   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
   2142   // Nudged range: [-0.5; 7.0].
   2143   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   2144   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2145                    .Attr("num_bits", 4)
   2146                    .Attr("narrow_range", false)
   2147                    .Input(FakeInput(DT_FLOAT))  // gradients
   2148                    .Input(FakeInput(DT_FLOAT))  // inputs
   2149                    .Input(FakeInput(DT_FLOAT))  // min
   2150                    .Input(FakeInput(DT_FLOAT))  // max
   2151                    .Finalize(node_def()));
   2152   TF_EXPECT_OK(InitOp());
   2153   // Upstream gradients.
   2154   AddRandomInput(TensorShape({2, 3}));
   2155   // Downstream inputs.
   2156   AddInputFromArray<float>(TensorShape({2, 3}),
   2157                            {-0.6f, -0.5f, -0.4f, 0.0f, 7.0f, 7.1f});
   2158   // Min.
   2159   AddInputFromArray<float>(TensorShape({3}), {-0.4f, -0.4f, -0.4f});
   2160   // Max.
   2161   AddInputFromArray<float>(TensorShape({3}), {7.1f, 7.1f, 7.1f});
   2162 
   2163   // Tested code.
   2164   TF_ASSERT_OK(RunOpKernel());
   2165 
   2166   Tensor* output_bprop_wrt_input = GetOutput(0);
   2167   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   2168   auto grad_flat = GetInput(0).flat<float>();
   2169   FillValues<float>(
   2170       &expected_bprop_wrt_input,
   2171       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   2172   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2173 
   2174   Tensor* output_bprop_wrt_min = GetOutput(1);
   2175   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   2176   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   2177   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2178 
   2179   Tensor* output_bprop_wrt_max = GetOutput(2);
   2180   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   2181   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   2182   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2183 }
   2184 
   2185 TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedUp_4Bits_NarrowRange) {
   2186   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
   2187   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
   2188   // Nudged range: [-0.5; 6.5].
   2189   // Expected quantized values: -0.5, 0.0, 0.5, ..., 6.5.
   2190   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2191                    .Attr("num_bits", 4)
   2192                    .Attr("narrow_range", true)
   2193                    .Input(FakeInput(DT_FLOAT))  // gradients
   2194                    .Input(FakeInput(DT_FLOAT))  // inputs
   2195                    .Input(FakeInput(DT_FLOAT))  // min
   2196                    .Input(FakeInput(DT_FLOAT))  // max
   2197                    .Finalize(node_def()));
   2198   TF_EXPECT_OK(InitOp());
   2199   // Upstream gradients.
   2200   AddRandomInput(TensorShape({2, 3}));
   2201   // Downstream inputs.
   2202   AddInputFromArray<float>(TensorShape({2, 3}),
   2203                            {-0.6f, -0.5f, -0.4f, 0.0f, 6.5f, 6.6f});
   2204   // Min.
   2205   AddInputFromArray<float>(TensorShape({3}), {-0.4f, -0.4f, -0.4f});
   2206   // Max.
   2207   AddInputFromArray<float>(TensorShape({3}), {6.6f, 6.6f, 6.6f});
   2208 
   2209   // Tested code.
   2210   TF_ASSERT_OK(RunOpKernel());
   2211 
   2212   Tensor* output_bprop_wrt_input = GetOutput(0);
   2213   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
   2214   auto grad_flat = GetInput(0).flat<float>();
   2215   FillValues<float>(
   2216       &expected_bprop_wrt_input,
   2217       {0.0f, grad_flat(1), grad_flat(2), grad_flat(3), grad_flat(4), 0.0f});
   2218   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2219 
   2220   Tensor* output_bprop_wrt_min = GetOutput(1);
   2221   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
   2222   FillValues<float>(&expected_bprop_wrt_min, {grad_flat(0), 0.0f, 0.0f});
   2223   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2224 
   2225   Tensor* output_bprop_wrt_max = GetOutput(2);
   2226   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
   2227   FillValues<float>(&expected_bprop_wrt_max, {0.0f, 0.0f, grad_flat(5)});
   2228   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2229 }
   2230 
   2231 TEST_F(QuantOpsTest,
   2232        WithVarsPerChannelDim4GradientNudgedDown_4Bits_RegularRange) {
   2233   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 15 / 2].
   2234   // Scale: 1/2,  original zero point: 0.2, nudged to 0.
   2235   // Nudged range: [0.0; 7.5].
   2236   // Expected quantized values: 0.0, 0.5, ..., 7.5.
   2237   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2238                    .Attr("num_bits", 4)
   2239                    .Attr("narrow_range", false)
   2240                    .Input(FakeInput(DT_FLOAT))  // gradients
   2241                    .Input(FakeInput(DT_FLOAT))  // inputs
   2242                    .Input(FakeInput(DT_FLOAT))  // min
   2243                    .Input(FakeInput(DT_FLOAT))  // max
   2244                    .Finalize(node_def()));
   2245   TF_EXPECT_OK(InitOp());
   2246   // Upstream gradients.
   2247   AddRandomInput(TensorShape({1, 2, 3, 4}));
   2248   // Downstream inputs.
   2249   // clang-format off
   2250   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   2251                            {-0.1f, 0.0f,  7.5f, 7.6f, -0.1f, 0.0f,
   2252                              7.5f, 7.6f, -0.1f, 0.0f,  7.5f, 7.6f,
   2253                             -0.1f, 0.0f,  7.5f, 7.6f, -0.1f, 0.0f,
   2254                              7.5f, 7.6f, -0.1f, 0.0f,  7.5f, 7.6f});
   2255   // clang-format on
   2256   // Min.
   2257   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   2258   // Max.
   2259   AddInputFromArray<float>(TensorShape({4}), {7.4f, 7.4f, 7.4f, 7.4f});
   2260 
   2261   // Tested code.
   2262   TF_ASSERT_OK(RunOpKernel());
   2263 
   2264   Tensor* output_bprop_wrt_input = GetOutput(0);
   2265   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   2266                                   TensorShape({1, 2, 3, 4}));
   2267   auto grad_flat = GetInput(0).flat<float>();
   2268   FillValues<float>(&expected_bprop_wrt_input,
   2269                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   2270                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   2271                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   2272                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   2273                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   2274                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   2275   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2276 
   2277   Tensor* output_bprop_wrt_min = GetOutput(1);
   2278   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   2279   FillValues<float>(&expected_bprop_wrt_min,
   2280                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   2281                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   2282                      0.0f, 0.0f, 0.0f});
   2283   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2284 
   2285   Tensor* output_bprop_wrt_max = GetOutput(2);
   2286   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   2287   FillValues<float>(&expected_bprop_wrt_max,
   2288                     {0.0f, 0.0f, 0.0f,
   2289                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   2290                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   2291   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2292 }
   2293 
   2294 TEST_F(QuantOpsTest,
   2295        WithVarsPerChannelDim4GradientNudgedDown_4Bits_NarrowRange) {
   2296   // Original quantization range: [-0.2 / 2 + 0 / 2, -0.2 / 2 + 14 / 2].
   2297   // Scale: 1/2,  original zero point: 1.2, nudged to 1.
   2298   // Nudged range: [0.0; 7.0].
   2299   // Expected quantized values: 0.0, 0.5, ..., 7.0.
   2300   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2301                    .Attr("num_bits", 4)
   2302                    .Attr("narrow_range", true)
   2303                    .Input(FakeInput(DT_FLOAT))  // gradients
   2304                    .Input(FakeInput(DT_FLOAT))  // inputs
   2305                    .Input(FakeInput(DT_FLOAT))  // min
   2306                    .Input(FakeInput(DT_FLOAT))  // max
   2307                    .Finalize(node_def()));
   2308   TF_EXPECT_OK(InitOp());
   2309   // Upstream gradients.
   2310   AddRandomInput(TensorShape({1, 2, 3, 4}));
   2311   // Downstream inputs.
   2312   // clang-format off
   2313   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   2314                            {-0.1f, 0.0f,  7.0f, 7.1f, -0.1f, 0.0f,
   2315                              7.0f, 7.1f, -0.1f, 0.0f,  7.0f, 7.1f,
   2316                             -0.1f, 0.0f,  7.0f, 7.1f, -0.1f, 0.0f,
   2317                              7.0f, 7.1f, -0.1f, 0.0f,  7.0f, 7.1f});
   2318   // clang-format on
   2319   // Min.
   2320   AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
   2321   // Max.
   2322   AddInputFromArray<float>(TensorShape({4}), {6.9f, 6.9f, 6.9f, 6.9f});
   2323 
   2324   // Tested code.
   2325   TF_ASSERT_OK(RunOpKernel());
   2326 
   2327   Tensor* output_bprop_wrt_input = GetOutput(0);
   2328   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   2329                                   TensorShape({1, 2, 3, 4}));
   2330   auto grad_flat = GetInput(0).flat<float>();
   2331   FillValues<float>(&expected_bprop_wrt_input,
   2332                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   2333                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   2334                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   2335                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   2336                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   2337                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   2338   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2339 
   2340   Tensor* output_bprop_wrt_min = GetOutput(1);
   2341   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   2342   FillValues<float>(&expected_bprop_wrt_min,
   2343                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   2344                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   2345                      0.0f, 0.0f, 0.0f});
   2346   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2347 
   2348   Tensor* output_bprop_wrt_max = GetOutput(2);
   2349   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   2350   FillValues<float>(&expected_bprop_wrt_max,
   2351                     {0.0f, 0.0f, 0.0f,
   2352                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   2353                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   2354   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2355 }
   2356 
   2357 TEST_F(QuantOpsTest,
   2358        WithVarsPerChannelDim4GradientNudgedUp_4Bits_RegularRange) {
   2359   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 15 / 2].
   2360   // Scale: 1/2,  original zero point: 0.8, nudged to 1.
   2361   // Nudged range: [-0.5; 7.0].
   2362   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   2363   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2364                    .Attr("num_bits", 4)
   2365                    .Attr("narrow_range", false)
   2366                    .Input(FakeInput(DT_FLOAT))  // gradients
   2367                    .Input(FakeInput(DT_FLOAT))  // inputs
   2368                    .Input(FakeInput(DT_FLOAT))  // min
   2369                    .Input(FakeInput(DT_FLOAT))  // max
   2370                    .Finalize(node_def()));
   2371   TF_EXPECT_OK(InitOp());
   2372   // Upstream gradients.
   2373   AddRandomInput(TensorShape({1, 2, 3, 4}));
   2374   // Downstream inputs.
   2375   // clang-format off
   2376   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   2377                            {-0.6f, -0.5f,  7.0f,  7.1f, -0.6f, -0.5f,
   2378                              7.0f,  7.1f, -0.6f, -0.5f,  7.0f,  7.1f,
   2379                             -0.6f, -0.5f,  7.0f,  7.1f, -0.6f, -0.5f,
   2380                              7.0f,  7.1f, -0.6f, -0.5f,  7.0f,  7.1f});
   2381   // clang-format on
   2382   // Min.
   2383   AddInputFromArray<float>(TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f});
   2384   // Max.
   2385   AddInputFromArray<float>(TensorShape({4}), {7.1f, 7.1f, 7.1f, 7.1f});
   2386 
   2387   // Tested code.
   2388   TF_ASSERT_OK(RunOpKernel());
   2389 
   2390   Tensor* output_bprop_wrt_input = GetOutput(0);
   2391   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   2392                                   TensorShape({1, 2, 3, 4}));
   2393   auto grad_flat = GetInput(0).flat<float>();
   2394   FillValues<float>(&expected_bprop_wrt_input,
   2395                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   2396                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   2397                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   2398                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   2399                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   2400                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   2401   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2402 
   2403   Tensor* output_bprop_wrt_min = GetOutput(1);
   2404   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   2405   FillValues<float>(&expected_bprop_wrt_min,
   2406                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   2407                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   2408                      0.0f, 0.0f, 0.0f});
   2409   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2410 
   2411   Tensor* output_bprop_wrt_max = GetOutput(2);
   2412   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   2413   FillValues<float>(&expected_bprop_wrt_max,
   2414                     {0.0f, 0.0f, 0.0f,
   2415                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   2416                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   2417   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2418 }
   2419 
   2420 TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedUp_4Bits_NarrowRange) {
   2421   // Original quantization range: [-0.8 / 2 + 0 / 2, -0.8 / 2 + 14 / 2].
   2422   // Scale: 1/2,  original zero point: 1.8, nudged to 2.
   2423   // Nudged range: [-0.5; 6.5].
   2424   // Expected quantized values: -0.5, 0.0, 0.5, ..., 7.0.
   2425   TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
   2426                    .Attr("num_bits", 4)
   2427                    .Attr("narrow_range", true)
   2428                    .Input(FakeInput(DT_FLOAT))  // gradients
   2429                    .Input(FakeInput(DT_FLOAT))  // inputs
   2430                    .Input(FakeInput(DT_FLOAT))  // min
   2431                    .Input(FakeInput(DT_FLOAT))  // max
   2432                    .Finalize(node_def()));
   2433   TF_EXPECT_OK(InitOp());
   2434   // Upstream gradients.
   2435   AddRandomInput(TensorShape({1, 2, 3, 4}));
   2436   // Downstream inputs.
   2437   // clang-format off
   2438   AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
   2439                            {-0.6f, -0.5f,  6.5f,  6.6f, -0.6f, -0.5f,
   2440                              6.5f,  6.6f, -0.6f, -0.5f,  6.5f,  6.6f,
   2441                             -0.6f, -0.5f,  6.5f,  6.6f, -0.6f, -0.5f,
   2442                              6.5f,  6.6f, -0.6f, -0.5f,  6.5f,  6.6f});
   2443   // clang-format on
   2444   // Min.
   2445   AddInputFromArray<float>(TensorShape({4}), {-0.4f, -0.4f, -0.4f, -0.4f});
   2446   // Max.
   2447   AddInputFromArray<float>(TensorShape({4}), {6.6f, 6.6f, 6.6f, 6.6f});
   2448 
   2449   // Tested code.
   2450   TF_ASSERT_OK(RunOpKernel());
   2451 
   2452   Tensor* output_bprop_wrt_input = GetOutput(0);
   2453   Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
   2454                                   TensorShape({1, 2, 3, 4}));
   2455   auto grad_flat = GetInput(0).flat<float>();
   2456   FillValues<float>(&expected_bprop_wrt_input,
   2457                     {0.0f, grad_flat(1),  grad_flat(2),  0.0f,
   2458                      0.0f, grad_flat(5),  grad_flat(6),  0.0f,
   2459                      0.0f, grad_flat(9),  grad_flat(10), 0.0f,
   2460                      0.0f, grad_flat(13), grad_flat(14), 0.0f,
   2461                      0.0f, grad_flat(17), grad_flat(18), 0.0f,
   2462                      0.0f, grad_flat(21), grad_flat(22), 0.0f});
   2463   ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
   2464 
   2465   Tensor* output_bprop_wrt_min = GetOutput(1);
   2466   Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
   2467   FillValues<float>(&expected_bprop_wrt_min,
   2468                     {grad_flat(0) + grad_flat(4) + grad_flat(8) +
   2469                          grad_flat(12) + grad_flat(16) + grad_flat(20),
   2470                      0.0f, 0.0f, 0.0f});
   2471   ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
   2472 
   2473   Tensor* output_bprop_wrt_max = GetOutput(2);
   2474   Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
   2475   FillValues<float>(&expected_bprop_wrt_max,
   2476                     {0.0f, 0.0f, 0.0f,
   2477                      grad_flat(3) + grad_flat(7) + grad_flat(11) +
   2478                          grad_flat(15) + grad_flat(19) + grad_flat(23)});
   2479   ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
   2480 }
   2481 
   2482 }  // namespace tensorflow
   2483