Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include <functional>
     17 #include <unordered_map>
     18 #include <utility>
     19 
     20 #include "tensorflow/core/framework/op_kernel.h"
     21 #include "tensorflow/core/framework/register_types.h"
     22 #include "tensorflow/core/framework/tensor.h"
     23 #include "tensorflow/core/framework/tensor_shape.h"
     24 #include "tensorflow/core/kernels/bounds_check.h"
     25 #include "tensorflow/core/lib/core/status.h"
     26 #include "tensorflow/core/lib/hash/hash.h"
     27 
     28 namespace tensorflow {
     29 
     30 typedef Eigen::ThreadPoolDevice CPUDevice;
     31 
     32 template <typename T, typename TIndex>
     33 class UniqueOp : public OpKernel {
     34  public:
     35   explicit UniqueOp(OpKernelConstruction* context) : OpKernel(context) {}
     36 
     37   void Compute(OpKernelContext* context) override {
     38     const Tensor& input = context->input(0);
     39     // TODO(dga):  Make unique polymorphic for returning int32 and int64
     40     // vectors to support large tensors.
     41     OP_REQUIRES(context,
     42                 input.NumElements() <= std::numeric_limits<int32>::max(),
     43                 errors::InvalidArgument(
     44                     "unique does not support input tensors larger than ",
     45                     std::numeric_limits<int32>::max(), " elements"));
     46 
     47     int64 axis = 0;
     48     std::vector<int64> new_sizes{1, input.NumElements(), 1};
     49     if (context->num_inputs() == 1) {
     50       OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
     51                   errors::InvalidArgument("unique expects a 1D vector."));
     52     } else {
     53       // In case of UniqueV2, the axis is a 1D vector. The purpose is
     54       // to allow specifying either "no axis" or "axis". The `[]` means
     55       // "no axis", while `[x]` means `axis = x`.
     56       const Tensor& axis_tensor = context->input(1);
     57       OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()),
     58                   errors::InvalidArgument("axis expects a 1D vector."));
     59       OP_REQUIRES(
     60           context, axis_tensor.NumElements() <= 1,
     61           errors::InvalidArgument(
     62               "axis does not support input tensors larger than 1 elements"));
     63       if (axis_tensor.NumElements() == 0) {
     64         OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
     65                     errors::InvalidArgument("unique expects a 1D vector."));
     66       } else {
     67         OP_REQUIRES(context,
     68                     (axis_tensor.dtype() == DT_INT32 ||
     69                      axis_tensor.dtype() == DT_INT64),
     70                     errors::InvalidArgument(
     71                         "axis tensor should be int32 or int64, but got ",
     72                         axis_tensor.dtype()));
     73         if (axis_tensor.dtype() == DT_INT32) {
     74           axis = internal::SubtleMustCopy(axis_tensor.scalar<int32>()());
     75         } else {
     76           axis = internal::SubtleMustCopy(axis_tensor.scalar<int64>()());
     77         }
     78         axis = axis < 0 ? axis + input.dims() : axis;
     79         OP_REQUIRES(context, 0 <= axis && axis < input.dims(),
     80                     errors::InvalidArgument("axis has to be between [0, ",
     81                                             input.dims(), ")"));
     82         if (axis > 0) {
     83           for (int64 i = 0; i < axis; i++) {
     84             new_sizes[0] *= input.dim_size(i);
     85           }
     86         }
     87         new_sizes[1] = input.dim_size(axis);
     88         if (axis + 1 < input.dims()) {
     89           for (int64 i = axis + 1; i < input.dims(); i++) {
     90             new_sizes[2] *= input.dim_size(i);
     91           }
     92         }
     93       }
     94     }
     95 
     96     Tensor* idx = nullptr;
     97     OP_REQUIRES_OK(context, context->allocate_output(
     98                                 1, TensorShape({new_sizes[1]}), &idx));
     99     auto idx_vec = idx->template vec<TIndex>();
    100 
    101     int64 uniq_size;
    102     if (new_sizes[0] == 1 && new_sizes[2] == 1) {
    103       // Specialized and faster implementation when unique is run over single
    104       // elements. Here we put T directly into the map rather than ints pointing
    105       // to them as in the general case.
    106       auto Tin = input.flat<T>();
    107       const int64 N = static_cast<int64>(Tin.size());
    108 
    109       std::unordered_map<T, TIndex> uniq;
    110       uniq.reserve(2 * N);
    111       for (int64 i = 0, j = 0; i < N; ++i) {
    112         auto it = uniq.insert(std::make_pair(Tin(i), j));
    113         idx_vec(i) = it.first->second;
    114         if (it.second) {
    115           ++j;
    116         }
    117       }
    118 
    119       uniq_size = static_cast<int64>(uniq.size());
    120       TensorShape output_shape(input.shape());
    121       output_shape.set_dim(axis, uniq_size);
    122       Tensor* output = nullptr;
    123       OP_REQUIRES_OK(context,
    124                      context->allocate_output(0, output_shape, &output));
    125       auto Tout = output->flat<T>();
    126 
    127       for (auto it : uniq) {
    128         Tout(it.second) = it.first;
    129       }
    130     } else {
    131       // General implementation when unique is run over multiple elements.
    132       auto Tin = input.shaped<T, 3>(new_sizes);
    133 
    134       auto hash_fn = [&Tin](const int64& key) {
    135         size_t h = 0;
    136         for (int64 i = 0; i < Tin.dimension(0); i++) {
    137           for (int64 j = 0; j < Tin.dimension(2); j++) {
    138             h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
    139           }
    140         }
    141         return h;
    142       };
    143 
    144       auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
    145         for (int64 i = 0; i < Tin.dimension(0); i++) {
    146           for (int64 j = 0; j < Tin.dimension(2); j++) {
    147             if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
    148               return false;
    149             }
    150           }
    151         }
    152         return true;
    153       };
    154 
    155       std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
    156           uniq(0, hash_fn, equal_to_fn);
    157 
    158       uniq.reserve(2 * Tin.dimension(1));
    159 
    160       for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
    161         auto it = uniq.insert(std::make_pair(i, j));
    162         idx_vec(i) = it.first->second;
    163         if (it.second) {
    164           ++j;
    165         }
    166       }
    167 
    168       uniq_size = static_cast<int64>(uniq.size());
    169       new_sizes[1] = uniq_size;
    170       TensorShape output_shape(input.shape());
    171       output_shape.set_dim(axis, uniq_size);
    172       Tensor* output = nullptr;
    173       OP_REQUIRES_OK(context,
    174                      context->allocate_output(0, output_shape, &output));
    175       auto Tout = output->shaped<T, 3>(new_sizes);
    176 
    177       for (auto it : uniq) {
    178         Tout.chip(it.second, 1) = Tin.chip(it.first, 1);
    179       }
    180     }
    181 
    182     if (num_outputs() > 2) {
    183       Tensor* output = nullptr;
    184       OP_REQUIRES_OK(context, context->allocate_output(
    185                                   2, TensorShape({uniq_size}), &output));
    186       auto count_output_vec = output->template vec<TIndex>();
    187       count_output_vec.setZero();
    188       const int N = idx_vec.size();
    189       for (int64 i = 0; i < N; ++i) {
    190         count_output_vec(idx_vec(i))++;
    191       }
    192     }
    193   }
    194 };
    195 
    196 #define REGISTER_UNIQUE(type)                                    \
    197   REGISTER_KERNEL_BUILDER(Name("Unique")                         \
    198                               .Device(DEVICE_CPU)                \
    199                               .TypeConstraint<type>("T")         \
    200                               .TypeConstraint<int32>("out_idx"), \
    201                           UniqueOp<type, int32>);                \
    202   REGISTER_KERNEL_BUILDER(Name("Unique")                         \
    203                               .Device(DEVICE_CPU)                \
    204                               .TypeConstraint<type>("T")         \
    205                               .TypeConstraint<int64>("out_idx"), \
    206                           UniqueOp<type, int64>);                \
    207   REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
    208                               .Device(DEVICE_CPU)                \
    209                               .TypeConstraint<type>("T")         \
    210                               .TypeConstraint<int32>("out_idx"), \
    211                           UniqueOp<type, int32>);                \
    212   REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
    213                               .Device(DEVICE_CPU)                \
    214                               .TypeConstraint<type>("T")         \
    215                               .TypeConstraint<int64>("out_idx"), \
    216                           UniqueOp<type, int64>);                \
    217   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
    218                               .Device(DEVICE_CPU)                \
    219                               .TypeConstraint<type>("T")         \
    220                               .TypeConstraint<int32>("out_idx"), \
    221                           UniqueOp<type, int32>)                 \
    222   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
    223                               .Device(DEVICE_CPU)                \
    224                               .TypeConstraint<type>("T")         \
    225                               .TypeConstraint<int64>("out_idx"), \
    226                           UniqueOp<type, int64>)
    227 TF_CALL_REAL_NUMBER_TYPES(REGISTER_UNIQUE);
    228 REGISTER_UNIQUE(string)
    229 #undef REGISTER_UNIQUE
    230 
    231 // Fake integer GPU kernels so that the use of Unique in optimizers (to
    232 // de-duplicate sparse gradient indices) does not conflict with gradients being
    233 // located on a GPU. These kernels run on the CPU, their inputs and outputs
    234 // residing in host (not GPU) memory.
    235 REGISTER_KERNEL_BUILDER(Name("Unique")
    236                             .Device(DEVICE_GPU)
    237                             .TypeConstraint<int32>("T")
    238                             .TypeConstraint<int32>("out_idx")
    239                             .HostMemory("x")
    240                             .HostMemory("y")
    241                             .HostMemory("idx"),
    242                         UniqueOp<int32, int32>);
    243 REGISTER_KERNEL_BUILDER(Name("Unique")
    244                             .Device(DEVICE_GPU)
    245                             .TypeConstraint<int32>("T")
    246                             .TypeConstraint<int64>("out_idx")
    247                             .HostMemory("x")
    248                             .HostMemory("y")
    249                             .HostMemory("idx"),
    250                         UniqueOp<int32, int64>);
    251 REGISTER_KERNEL_BUILDER(Name("Unique")
    252                             .Device(DEVICE_GPU)
    253                             .TypeConstraint<int64>("T")
    254                             .TypeConstraint<int32>("out_idx")
    255                             .HostMemory("x")
    256                             .HostMemory("y")
    257                             .HostMemory("idx"),
    258                         UniqueOp<int64, int32>);
    259 REGISTER_KERNEL_BUILDER(Name("Unique")
    260                             .Device(DEVICE_GPU)
    261                             .TypeConstraint<int64>("T")
    262                             .TypeConstraint<int64>("out_idx")
    263                             .HostMemory("x")
    264                             .HostMemory("y")
    265                             .HostMemory("idx"),
    266                         UniqueOp<int64, int64>);
    267 
    268 #ifdef TENSORFLOW_USE_SYCL
    269 REGISTER_KERNEL_BUILDER(Name("Unique")
    270                             .Device(DEVICE_SYCL)
    271                             .TypeConstraint<int32>("T")
    272                             .TypeConstraint<int32>("out_idx")
    273                             .HostMemory("x")
    274                             .HostMemory("y")
    275                             .HostMemory("idx"),
    276                         UniqueOp<int32, int32>);
    277 REGISTER_KERNEL_BUILDER(Name("Unique")
    278                             .Device(DEVICE_SYCL)
    279                             .TypeConstraint<int64>("T")
    280                             .TypeConstraint<int32>("out_idx")
    281                             .HostMemory("x")
    282                             .HostMemory("y")
    283                             .HostMemory("idx"),
    284                         UniqueOp<int64, int32>);
    285 REGISTER_KERNEL_BUILDER(Name("Unique")
    286                             .Device(DEVICE_SYCL)
    287                             .TypeConstraint<int32>("T")
    288                             .TypeConstraint<int64>("out_idx")
    289                             .HostMemory("x")
    290                             .HostMemory("y")
    291                             .HostMemory("idx"),
    292                         UniqueOp<int32, int64>);
    293 REGISTER_KERNEL_BUILDER(Name("Unique")
    294                             .Device(DEVICE_SYCL)
    295                             .TypeConstraint<int64>("T")
    296                             .TypeConstraint<int64>("out_idx")
    297                             .HostMemory("x")
    298                             .HostMemory("y")
    299                             .HostMemory("idx"),
    300                         UniqueOp<int64, int64>);
    301 #endif  // TENSORFLOW_USE_SYCL
    302 }  // namespace tensorflow
    303