1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/python/lib/core/ndarray_tensor.h" 17 18 #include <cstring> 19 20 #include "tensorflow/core/lib/core/coding.h" 21 #include "tensorflow/core/lib/core/errors.h" 22 #include "tensorflow/core/lib/gtl/inlined_vector.h" 23 #include "tensorflow/core/platform/types.h" 24 #include "tensorflow/python/lib/core/bfloat16.h" 25 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h" 26 27 namespace tensorflow { 28 namespace { 29 30 Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr, 31 TF_DataType* out_tf_datatype) { 32 PyObject* key; 33 PyObject* value; 34 Py_ssize_t pos = 0; 35 if (PyDict_Next(descr->fields, &pos, &key, &value)) { 36 // In Python 3, the keys of numpy custom struct types are unicode, unlike 37 // Python 2, where the keys are bytes. 38 const char* key_string = 39 PyBytes_Check(key) ? PyBytes_AsString(key) 40 : PyBytes_AsString(PyUnicode_AsASCIIString(key)); 41 if (!key_string) { 42 return errors::Internal("Corrupt numpy type descriptor"); 43 } 44 tensorflow::string key = key_string; 45 // The typenames here should match the field names in the custom struct 46 // types constructed in test_util.py. 47 // TODO(mrry,keveman): Investigate Numpy type registration to replace this 48 // hard-coding of names. 49 if (key == "quint8") { 50 *out_tf_datatype = TF_QUINT8; 51 } else if (key == "qint8") { 52 *out_tf_datatype = TF_QINT8; 53 } else if (key == "qint16") { 54 *out_tf_datatype = TF_QINT16; 55 } else if (key == "quint16") { 56 *out_tf_datatype = TF_QUINT16; 57 } else if (key == "qint32") { 58 *out_tf_datatype = TF_QINT32; 59 } else if (key == "resource") { 60 *out_tf_datatype = TF_RESOURCE; 61 } else { 62 return errors::Internal("Unsupported numpy data type"); 63 } 64 return Status::OK(); 65 } 66 return errors::Internal("Unsupported numpy data type"); 67 } 68 69 Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array, 70 TF_DataType* out_tf_datatype) { 71 int pyarray_type = PyArray_TYPE(array); 72 PyArray_Descr* descr = PyArray_DESCR(array); 73 switch (pyarray_type) { 74 case NPY_FLOAT16: 75 *out_tf_datatype = TF_HALF; 76 break; 77 case NPY_FLOAT32: 78 *out_tf_datatype = TF_FLOAT; 79 break; 80 case NPY_FLOAT64: 81 *out_tf_datatype = TF_DOUBLE; 82 break; 83 case NPY_INT32: 84 *out_tf_datatype = TF_INT32; 85 break; 86 case NPY_UINT8: 87 *out_tf_datatype = TF_UINT8; 88 break; 89 case NPY_UINT16: 90 *out_tf_datatype = TF_UINT16; 91 break; 92 case NPY_UINT32: 93 *out_tf_datatype = TF_UINT32; 94 break; 95 case NPY_UINT64: 96 *out_tf_datatype = TF_UINT64; 97 break; 98 case NPY_INT8: 99 *out_tf_datatype = TF_INT8; 100 break; 101 case NPY_INT16: 102 *out_tf_datatype = TF_INT16; 103 break; 104 case NPY_INT64: 105 *out_tf_datatype = TF_INT64; 106 break; 107 case NPY_BOOL: 108 *out_tf_datatype = TF_BOOL; 109 break; 110 case NPY_COMPLEX64: 111 *out_tf_datatype = TF_COMPLEX64; 112 break; 113 case NPY_COMPLEX128: 114 *out_tf_datatype = TF_COMPLEX128; 115 break; 116 case NPY_OBJECT: 117 case NPY_STRING: 118 case NPY_UNICODE: 119 *out_tf_datatype = TF_STRING; 120 break; 121 case NPY_VOID: 122 // Quantized types are currently represented as custom struct types. 123 // PyArray_TYPE returns NPY_VOID for structs, and we should look into 124 // descr to derive the actual type. 125 // Direct feeds of certain types of ResourceHandles are represented as a 126 // custom struct type. 127 return PyArrayDescr_to_TF_DataType(descr, out_tf_datatype); 128 default: 129 if (pyarray_type == Bfloat16NumpyType()) { 130 *out_tf_datatype = TF_BFLOAT16; 131 break; 132 } 133 // TODO(mrry): Support these. 134 return errors::Internal("Unsupported feed type"); 135 } 136 return Status::OK(); 137 } 138 139 // Iterate over the string array 'array', extract the ptr and len of each string 140 // element and call f(ptr, len). 141 template <typename F> 142 Status PyBytesArrayMap(PyArrayObject* array, F f) { 143 Safe_PyObjectPtr iter = tensorflow::make_safe( 144 PyArray_IterNew(reinterpret_cast<PyObject*>(array))); 145 while (PyArray_ITER_NOTDONE(iter.get())) { 146 auto item = tensorflow::make_safe(PyArray_GETITEM( 147 array, static_cast<char*>(PyArray_ITER_DATA(iter.get())))); 148 if (!item.get()) { 149 return errors::Internal("Unable to get element from the feed - no item."); 150 } 151 char* ptr; 152 Py_ssize_t len; 153 154 if (PyUnicode_Check(item.get())) { 155 #if PY_VERSION_HEX >= 0x03030000 156 // Accept unicode by converting to UTF-8 bytes. 157 ptr = PyUnicode_AsUTF8AndSize(item.get(), &len); 158 if (!ptr) { 159 return errors::Internal("Unable to get element as UTF-8."); 160 } 161 f(ptr, len); 162 #else 163 PyObject* utemp = PyUnicode_AsUTF8String(item.get()); 164 if (!utemp || PyBytes_AsStringAndSize(utemp, &ptr, &len) == -1) { 165 Py_XDECREF(utemp); 166 return errors::Internal("Unable to convert element to UTF-8."); 167 } 168 f(ptr, len); 169 Py_DECREF(utemp); 170 #endif 171 } else { 172 int success = PyBytes_AsStringAndSize(item.get(), &ptr, &len); 173 if (success != 0) { 174 return errors::Internal("Unable to get element as bytes."); 175 } 176 f(ptr, len); 177 } 178 PyArray_ITER_NEXT(iter.get()); 179 } 180 return Status::OK(); 181 } 182 183 // Encode the strings in 'array' into a contiguous buffer and return the base of 184 // the buffer. The caller takes ownership of the buffer. 185 Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems, 186 size_t* size, void** buffer) { 187 // Compute bytes needed for encoding. 188 *size = 0; 189 TF_RETURN_IF_ERROR(PyBytesArrayMap(array, [&size](char* ptr, Py_ssize_t len) { 190 *size += 191 sizeof(tensorflow::uint64) + tensorflow::core::VarintLength(len) + len; 192 })); 193 // Encode all strings. 194 std::unique_ptr<char[]> base_ptr(new char[*size]); 195 char* base = base_ptr.get(); 196 char* data_start = base + sizeof(tensorflow::uint64) * nelems; 197 char* dst = data_start; // Where next string is encoded. 198 tensorflow::uint64* offsets = reinterpret_cast<tensorflow::uint64*>(base); 199 200 TF_RETURN_IF_ERROR(PyBytesArrayMap( 201 array, [&base, &data_start, &dst, &offsets](char* ptr, Py_ssize_t len) { 202 *offsets = (dst - data_start); 203 offsets++; 204 dst = tensorflow::core::EncodeVarint64(dst, len); 205 memcpy(dst, ptr, len); 206 dst += len; 207 })); 208 CHECK_EQ(dst, base + *size); 209 *buffer = base_ptr.release(); 210 return Status::OK(); 211 } 212 213 Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems, 214 PyArrayObject* dst) { 215 const void* tensor_data = TF_TensorData(src); 216 const size_t tensor_size = TF_TensorByteSize(src); 217 const char* limit = static_cast<const char*>(tensor_data) + tensor_size; 218 DCHECK(tensor_data != nullptr); 219 DCHECK_EQ(TF_STRING, TF_TensorType(src)); 220 221 const uint64* offsets = static_cast<const uint64*>(tensor_data); 222 const size_t offsets_size = sizeof(uint64) * nelems; 223 const char* data = static_cast<const char*>(tensor_data) + offsets_size; 224 225 const size_t expected_tensor_size = 226 (limit - static_cast<const char*>(tensor_data)); 227 if (expected_tensor_size - tensor_size) { 228 return errors::InvalidArgument( 229 "Invalid/corrupt TF_STRING tensor: expected ", expected_tensor_size, 230 " bytes of encoded strings for the tensor containing ", nelems, 231 " strings, but the tensor is encoded in ", tensor_size, " bytes"); 232 } 233 std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status( 234 TF_NewStatus(), TF_DeleteStatus); 235 auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst))); 236 for (int64 i = 0; i < nelems; ++i) { 237 const char* start = data + offsets[i]; 238 const char* ptr = nullptr; 239 size_t len = 0; 240 241 TF_StringDecode(start, limit - start, &ptr, &len, status.get()); 242 if (TF_GetCode(status.get()) != TF_OK) { 243 return errors::InvalidArgument(TF_Message(status.get())); 244 } 245 246 auto py_string = make_safe(PyBytes_FromStringAndSize(ptr, len)); 247 if (py_string == nullptr) { 248 return errors::Internal( 249 "failed to create a python byte array when converting element #", i, 250 " of a TF_STRING tensor to a numpy ndarray"); 251 } 252 253 if (PyArray_SETITEM(dst, static_cast<char*>(PyArray_ITER_DATA(iter.get())), 254 py_string.get()) != 0) { 255 return errors::Internal("Error settings element #", i, 256 " in the numpy ndarray"); 257 } 258 PyArray_ITER_NEXT(iter.get()); 259 } 260 return Status::OK(); 261 } 262 263 // Determine the dimensions of a numpy ndarray to be created to represent an 264 // output Tensor. 265 gtl::InlinedVector<npy_intp, 4> GetPyArrayDimensionsForTensor( 266 const TF_Tensor* tensor, tensorflow::int64* nelems) { 267 const int ndims = TF_NumDims(tensor); 268 gtl::InlinedVector<npy_intp, 4> dims(ndims); 269 if (TF_TensorType(tensor) == TF_RESOURCE) { 270 dims[0] = TF_TensorByteSize(tensor); 271 *nelems = dims[0]; 272 } else { 273 *nelems = 1; 274 for (int i = 0; i < ndims; ++i) { 275 dims[i] = TF_Dim(tensor, i); 276 *nelems *= dims[i]; 277 } 278 } 279 return dims; 280 } 281 282 // Determine the type description (PyArray_Descr) of a numpy ndarray to be 283 // created to represent an output Tensor. 284 Status GetPyArrayDescrForTensor(const TF_Tensor* tensor, 285 PyArray_Descr** descr) { 286 if (TF_TensorType(tensor) == TF_RESOURCE) { 287 PyObject* field = PyTuple_New(3); 288 #if PY_MAJOR_VERSION < 3 289 PyTuple_SetItem(field, 0, PyBytes_FromString("resource")); 290 #else 291 PyTuple_SetItem(field, 0, PyUnicode_FromString("resource")); 292 #endif 293 PyTuple_SetItem(field, 1, PyArray_TypeObjectFromType(NPY_UBYTE)); 294 PyTuple_SetItem(field, 2, PyLong_FromLong(1)); 295 PyObject* fields = PyList_New(1); 296 PyList_SetItem(fields, 0, field); 297 int convert_result = PyArray_DescrConverter(fields, descr); 298 Py_CLEAR(field); 299 Py_CLEAR(fields); 300 if (convert_result != 1) { 301 return errors::Internal("Failed to create numpy array description for ", 302 "TF_RESOURCE-type tensor"); 303 } 304 } else { 305 int type_num = -1; 306 TF_RETURN_IF_ERROR( 307 TF_DataType_to_PyArray_TYPE(TF_TensorType(tensor), &type_num)); 308 *descr = PyArray_DescrFromType(type_num); 309 } 310 311 return Status::OK(); 312 } 313 } // namespace 314 315 // Converts the given TF_Tensor to a numpy ndarray. 316 // If the returned status is OK, the caller becomes the owner of *out_array. 317 Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) { 318 // A fetched operation will correspond to a null tensor, and a None 319 // in Python. 320 if (tensor == nullptr) { 321 Py_INCREF(Py_None); 322 *out_ndarray = Py_None; 323 return Status::OK(); 324 } 325 int64 nelems = -1; 326 gtl::InlinedVector<npy_intp, 4> dims = 327 GetPyArrayDimensionsForTensor(tensor.get(), &nelems); 328 329 // If the type is neither string nor resource we can reuse the Tensor memory. 330 TF_Tensor* original = tensor.get(); 331 TF_Tensor* moved = TF_TensorMaybeMove(tensor.release()); 332 if (moved != nullptr) { 333 if (ArrayFromMemory(dims.size(), dims.data(), TF_TensorData(moved), 334 static_cast<DataType>(TF_TensorType(moved)), 335 [moved] { TF_DeleteTensor(moved); }, out_ndarray) 336 .ok()) { 337 return Status::OK(); 338 } 339 } 340 tensor.reset(original); 341 342 // Copy the TF_TensorData into a newly-created ndarray and return it. 343 PyArray_Descr* descr = nullptr; 344 TF_RETURN_IF_ERROR(GetPyArrayDescrForTensor(tensor.get(), &descr)); 345 Safe_PyObjectPtr safe_out_array = 346 tensorflow::make_safe(PyArray_Empty(dims.size(), dims.data(), descr, 0)); 347 if (!safe_out_array) { 348 return errors::Internal("Could not allocate ndarray"); 349 } 350 PyArrayObject* py_array = 351 reinterpret_cast<PyArrayObject*>(safe_out_array.get()); 352 if (TF_TensorType(tensor.get()) == TF_STRING) { 353 Status s = CopyTF_TensorStringsToPyArray(tensor.get(), nelems, py_array); 354 if (!s.ok()) { 355 return s; 356 } 357 } else if (static_cast<size_t>(PyArray_NBYTES(py_array)) != 358 TF_TensorByteSize(tensor.get())) { 359 return errors::Internal("ndarray was ", PyArray_NBYTES(py_array), 360 " bytes but TF_Tensor was ", 361 TF_TensorByteSize(tensor.get()), " bytes"); 362 } else { 363 memcpy(PyArray_DATA(py_array), TF_TensorData(tensor.get()), 364 PyArray_NBYTES(py_array)); 365 } 366 367 // PyArray_Return turns rank 0 arrays into numpy scalars 368 *out_ndarray = PyArray_Return( 369 reinterpret_cast<PyArrayObject*>(safe_out_array.release())); 370 return Status::OK(); 371 } 372 373 Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) { 374 DCHECK(out_tensor != nullptr); 375 376 // Make sure we dereference this array object in case of error, etc. 377 Safe_PyObjectPtr array_safe(make_safe( 378 PyArray_FromAny(ndarray, nullptr, 0, 0, NPY_ARRAY_CARRAY, nullptr))); 379 if (!array_safe) return errors::InvalidArgument("Not a ndarray."); 380 PyArrayObject* array = reinterpret_cast<PyArrayObject*>(array_safe.get()); 381 382 // Convert numpy dtype to TensorFlow dtype. 383 TF_DataType dtype = TF_FLOAT; 384 TF_RETURN_IF_ERROR(PyArray_TYPE_to_TF_DataType(array, &dtype)); 385 386 tensorflow::int64 nelems = 1; 387 gtl::InlinedVector<int64_t, 4> dims; 388 for (int i = 0; i < PyArray_NDIM(array); ++i) { 389 dims.push_back(PyArray_SHAPE(array)[i]); 390 nelems *= dims[i]; 391 } 392 393 // Create a TF_Tensor based on the fed data. In the case of non-string data 394 // type, this steals a reference to array, which will be relinquished when 395 // the underlying buffer is deallocated. For string, a new temporary buffer 396 // is allocated into which the strings are encoded. 397 if (dtype == TF_RESOURCE) { 398 size_t size = PyArray_NBYTES(array); 399 array_safe.release(); 400 *out_tensor = make_safe(TF_NewTensor(dtype, {}, 0, PyArray_DATA(array), 401 size, &DelayedNumpyDecref, array)); 402 403 } else if (dtype != TF_STRING) { 404 size_t size = PyArray_NBYTES(array); 405 array_safe.release(); 406 *out_tensor = make_safe(TF_NewTensor(dtype, dims.data(), dims.size(), 407 PyArray_DATA(array), size, 408 &DelayedNumpyDecref, array)); 409 } else { 410 size_t size = 0; 411 void* encoded = nullptr; 412 TF_RETURN_IF_ERROR(EncodePyBytesArray(array, nelems, &size, &encoded)); 413 *out_tensor = 414 make_safe(TF_NewTensor(dtype, dims.data(), dims.size(), encoded, size, 415 [](void* data, size_t len, void* arg) { 416 delete[] reinterpret_cast<char*>(data); 417 }, 418 nullptr)); 419 } 420 return Status::OK(); 421 } 422 423 Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst); 424 TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, 425 TF_Status* status); 426 427 Status NdarrayToTensor(PyObject* obj, Tensor* ret) { 428 Safe_TF_TensorPtr tf_tensor = make_safe(static_cast<TF_Tensor*>(nullptr)); 429 Status s = PyArrayToTF_Tensor(obj, &tf_tensor); 430 if (!s.ok()) { 431 return s; 432 } 433 return TF_TensorToTensor(tf_tensor.get(), ret); 434 } 435 436 Status TensorToNdarray(const Tensor& t, PyObject** ret) { 437 TF_Status* status = TF_NewStatus(); 438 Safe_TF_TensorPtr tf_tensor = make_safe(TF_TensorFromTensor(t, status)); 439 Status tf_status = StatusFromTF_Status(status); 440 TF_DeleteStatus(status); 441 if (!tf_status.ok()) { 442 return tf_status; 443 } 444 return TF_TensorToPyArray(std::move(tf_tensor), ret); 445 } 446 447 } // namespace tensorflow 448