1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <functional> 17 #include <unordered_map> 18 #include <utility> 19 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/register_types.h" 22 #include "tensorflow/core/framework/tensor.h" 23 #include "tensorflow/core/framework/tensor_shape.h" 24 #include "tensorflow/core/kernels/bounds_check.h" 25 #include "tensorflow/core/lib/core/status.h" 26 #include "tensorflow/core/lib/hash/hash.h" 27 28 namespace tensorflow { 29 30 typedef Eigen::ThreadPoolDevice CPUDevice; 31 32 template <typename T, typename TIndex> 33 class UniqueOp : public OpKernel { 34 public: 35 explicit UniqueOp(OpKernelConstruction* context) : OpKernel(context) {} 36 37 void Compute(OpKernelContext* context) override { 38 const Tensor& input = context->input(0); 39 // TODO(dga): Make unique polymorphic for returning int32 and int64 40 // vectors to support large tensors. 41 OP_REQUIRES(context, 42 input.NumElements() <= std::numeric_limits<int32>::max(), 43 errors::InvalidArgument( 44 "unique does not support input tensors larger than ", 45 std::numeric_limits<int32>::max(), " elements")); 46 47 int64 axis = 0; 48 std::vector<int64> new_sizes{1, input.NumElements(), 1}; 49 if (context->num_inputs() == 1) { 50 OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), 51 errors::InvalidArgument("unique expects a 1D vector.")); 52 } else { 53 // In case of UniqueV2, the axis is a 1D vector. The purpose is 54 // to allow specifying either "no axis" or "axis". The `[]` means 55 // "no axis", while `[x]` means `axis = x`. 56 const Tensor& axis_tensor = context->input(1); 57 OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), 58 errors::InvalidArgument("axis expects a 1D vector.")); 59 OP_REQUIRES( 60 context, axis_tensor.NumElements() <= 1, 61 errors::InvalidArgument( 62 "axis does not support input tensors larger than 1 elements")); 63 if (axis_tensor.NumElements() == 0) { 64 OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), 65 errors::InvalidArgument("unique expects a 1D vector.")); 66 } else { 67 OP_REQUIRES(context, 68 (axis_tensor.dtype() == DT_INT32 || 69 axis_tensor.dtype() == DT_INT64), 70 errors::InvalidArgument( 71 "axis tensor should be int32 or int64, but got ", 72 axis_tensor.dtype())); 73 if (axis_tensor.dtype() == DT_INT32) { 74 axis = internal::SubtleMustCopy(axis_tensor.scalar<int32>()()); 75 } else { 76 axis = internal::SubtleMustCopy(axis_tensor.scalar<int64>()()); 77 } 78 axis = axis < 0 ? axis + input.dims() : axis; 79 OP_REQUIRES(context, 0 <= axis && axis < input.dims(), 80 errors::InvalidArgument("axis has to be between [0, ", 81 input.dims(), ")")); 82 if (axis > 0) { 83 for (int64 i = 0; i < axis; i++) { 84 new_sizes[0] *= input.dim_size(i); 85 } 86 } 87 new_sizes[1] = input.dim_size(axis); 88 if (axis + 1 < input.dims()) { 89 for (int64 i = axis + 1; i < input.dims(); i++) { 90 new_sizes[2] *= input.dim_size(i); 91 } 92 } 93 } 94 } 95 96 Tensor* idx = nullptr; 97 OP_REQUIRES_OK(context, context->allocate_output( 98 1, TensorShape({new_sizes[1]}), &idx)); 99 auto idx_vec = idx->template vec<TIndex>(); 100 101 int64 uniq_size; 102 if (new_sizes[0] == 1 && new_sizes[2] == 1) { 103 // Specialized and faster implementation when unique is run over single 104 // elements. Here we put T directly into the map rather than ints pointing 105 // to them as in the general case. 106 auto Tin = input.flat<T>(); 107 const int64 N = static_cast<int64>(Tin.size()); 108 109 std::unordered_map<T, TIndex> uniq; 110 uniq.reserve(2 * N); 111 for (int64 i = 0, j = 0; i < N; ++i) { 112 auto it = uniq.insert(std::make_pair(Tin(i), j)); 113 idx_vec(i) = it.first->second; 114 if (it.second) { 115 ++j; 116 } 117 } 118 119 uniq_size = static_cast<int64>(uniq.size()); 120 TensorShape output_shape(input.shape()); 121 output_shape.set_dim(axis, uniq_size); 122 Tensor* output = nullptr; 123 OP_REQUIRES_OK(context, 124 context->allocate_output(0, output_shape, &output)); 125 auto Tout = output->flat<T>(); 126 127 for (auto it : uniq) { 128 Tout(it.second) = it.first; 129 } 130 } else { 131 // General implementation when unique is run over multiple elements. 132 auto Tin = input.shaped<T, 3>(new_sizes); 133 134 auto hash_fn = [&Tin](const int64& key) { 135 size_t h = 0; 136 for (int64 i = 0; i < Tin.dimension(0); i++) { 137 for (int64 j = 0; j < Tin.dimension(2); j++) { 138 h = Hash64Combine(h, hash<T>{}(Tin(i, key, j))); 139 } 140 } 141 return h; 142 }; 143 144 auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { 145 for (int64 i = 0; i < Tin.dimension(0); i++) { 146 for (int64 j = 0; j < Tin.dimension(2); j++) { 147 if (Tin(i, lhs, j) != Tin(i, rhs, j)) { 148 return false; 149 } 150 } 151 } 152 return true; 153 }; 154 155 std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)> 156 uniq(0, hash_fn, equal_to_fn); 157 158 uniq.reserve(2 * Tin.dimension(1)); 159 160 for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { 161 auto it = uniq.insert(std::make_pair(i, j)); 162 idx_vec(i) = it.first->second; 163 if (it.second) { 164 ++j; 165 } 166 } 167 168 uniq_size = static_cast<int64>(uniq.size()); 169 new_sizes[1] = uniq_size; 170 TensorShape output_shape(input.shape()); 171 output_shape.set_dim(axis, uniq_size); 172 Tensor* output = nullptr; 173 OP_REQUIRES_OK(context, 174 context->allocate_output(0, output_shape, &output)); 175 auto Tout = output->shaped<T, 3>(new_sizes); 176 177 for (auto it : uniq) { 178 Tout.chip(it.second, 1) = Tin.chip(it.first, 1); 179 } 180 } 181 182 if (num_outputs() > 2) { 183 Tensor* output = nullptr; 184 OP_REQUIRES_OK(context, context->allocate_output( 185 2, TensorShape({uniq_size}), &output)); 186 auto count_output_vec = output->template vec<TIndex>(); 187 count_output_vec.setZero(); 188 const int N = idx_vec.size(); 189 for (int64 i = 0; i < N; ++i) { 190 count_output_vec(idx_vec(i))++; 191 } 192 } 193 } 194 }; 195 196 #define REGISTER_UNIQUE(type) \ 197 REGISTER_KERNEL_BUILDER(Name("Unique") \ 198 .Device(DEVICE_CPU) \ 199 .TypeConstraint<type>("T") \ 200 .TypeConstraint<int32>("out_idx"), \ 201 UniqueOp<type, int32>); \ 202 REGISTER_KERNEL_BUILDER(Name("Unique") \ 203 .Device(DEVICE_CPU) \ 204 .TypeConstraint<type>("T") \ 205 .TypeConstraint<int64>("out_idx"), \ 206 UniqueOp<type, int64>); \ 207 REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ 208 .Device(DEVICE_CPU) \ 209 .TypeConstraint<type>("T") \ 210 .TypeConstraint<int32>("out_idx"), \ 211 UniqueOp<type, int32>); \ 212 REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ 213 .Device(DEVICE_CPU) \ 214 .TypeConstraint<type>("T") \ 215 .TypeConstraint<int64>("out_idx"), \ 216 UniqueOp<type, int64>); \ 217 REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ 218 .Device(DEVICE_CPU) \ 219 .TypeConstraint<type>("T") \ 220 .TypeConstraint<int32>("out_idx"), \ 221 UniqueOp<type, int32>) \ 222 REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ 223 .Device(DEVICE_CPU) \ 224 .TypeConstraint<type>("T") \ 225 .TypeConstraint<int64>("out_idx"), \ 226 UniqueOp<type, int64>) 227 TF_CALL_REAL_NUMBER_TYPES(REGISTER_UNIQUE); 228 REGISTER_UNIQUE(string) 229 #undef REGISTER_UNIQUE 230 231 // Fake integer GPU kernels so that the use of Unique in optimizers (to 232 // de-duplicate sparse gradient indices) does not conflict with gradients being 233 // located on a GPU. These kernels run on the CPU, their inputs and outputs 234 // residing in host (not GPU) memory. 235 REGISTER_KERNEL_BUILDER(Name("Unique") 236 .Device(DEVICE_GPU) 237 .TypeConstraint<int32>("T") 238 .TypeConstraint<int32>("out_idx") 239 .HostMemory("x") 240 .HostMemory("y") 241 .HostMemory("idx"), 242 UniqueOp<int32, int32>); 243 REGISTER_KERNEL_BUILDER(Name("Unique") 244 .Device(DEVICE_GPU) 245 .TypeConstraint<int32>("T") 246 .TypeConstraint<int64>("out_idx") 247 .HostMemory("x") 248 .HostMemory("y") 249 .HostMemory("idx"), 250 UniqueOp<int32, int64>); 251 REGISTER_KERNEL_BUILDER(Name("Unique") 252 .Device(DEVICE_GPU) 253 .TypeConstraint<int64>("T") 254 .TypeConstraint<int32>("out_idx") 255 .HostMemory("x") 256 .HostMemory("y") 257 .HostMemory("idx"), 258 UniqueOp<int64, int32>); 259 REGISTER_KERNEL_BUILDER(Name("Unique") 260 .Device(DEVICE_GPU) 261 .TypeConstraint<int64>("T") 262 .TypeConstraint<int64>("out_idx") 263 .HostMemory("x") 264 .HostMemory("y") 265 .HostMemory("idx"), 266 UniqueOp<int64, int64>); 267 268 #ifdef TENSORFLOW_USE_SYCL 269 REGISTER_KERNEL_BUILDER(Name("Unique") 270 .Device(DEVICE_SYCL) 271 .TypeConstraint<int32>("T") 272 .TypeConstraint<int32>("out_idx") 273 .HostMemory("x") 274 .HostMemory("y") 275 .HostMemory("idx"), 276 UniqueOp<int32, int32>); 277 REGISTER_KERNEL_BUILDER(Name("Unique") 278 .Device(DEVICE_SYCL) 279 .TypeConstraint<int64>("T") 280 .TypeConstraint<int32>("out_idx") 281 .HostMemory("x") 282 .HostMemory("y") 283 .HostMemory("idx"), 284 UniqueOp<int64, int32>); 285 REGISTER_KERNEL_BUILDER(Name("Unique") 286 .Device(DEVICE_SYCL) 287 .TypeConstraint<int32>("T") 288 .TypeConstraint<int64>("out_idx") 289 .HostMemory("x") 290 .HostMemory("y") 291 .HostMemory("idx"), 292 UniqueOp<int32, int64>); 293 REGISTER_KERNEL_BUILDER(Name("Unique") 294 .Device(DEVICE_SYCL) 295 .TypeConstraint<int64>("T") 296 .TypeConstraint<int64>("out_idx") 297 .HostMemory("x") 298 .HostMemory("y") 299 .HostMemory("idx"), 300 UniqueOp<int64, int64>); 301 #endif // TENSORFLOW_USE_SYCL 302 } // namespace tensorflow 303