Home | History | Annotate | Download | only in core
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/python/lib/core/ndarray_tensor.h"
     17 
     18 #include <cstring>
     19 
     20 #include "tensorflow/core/lib/core/coding.h"
     21 #include "tensorflow/core/lib/core/errors.h"
     22 #include "tensorflow/core/lib/gtl/inlined_vector.h"
     23 #include "tensorflow/core/platform/types.h"
     24 #include "tensorflow/python/lib/core/bfloat16.h"
     25 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
     26 
     27 namespace tensorflow {
     28 namespace {
     29 
     30 Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr,
     31                                    TF_DataType* out_tf_datatype) {
     32   PyObject* key;
     33   PyObject* value;
     34   Py_ssize_t pos = 0;
     35   if (PyDict_Next(descr->fields, &pos, &key, &value)) {
     36     // In Python 3, the keys of numpy custom struct types are unicode, unlike
     37     // Python 2, where the keys are bytes.
     38     const char* key_string =
     39         PyBytes_Check(key) ? PyBytes_AsString(key)
     40                            : PyBytes_AsString(PyUnicode_AsASCIIString(key));
     41     if (!key_string) {
     42       return errors::Internal("Corrupt numpy type descriptor");
     43     }
     44     tensorflow::string key = key_string;
     45     // The typenames here should match the field names in the custom struct
     46     // types constructed in test_util.py.
     47     // TODO(mrry,keveman): Investigate Numpy type registration to replace this
     48     // hard-coding of names.
     49     if (key == "quint8") {
     50       *out_tf_datatype = TF_QUINT8;
     51     } else if (key == "qint8") {
     52       *out_tf_datatype = TF_QINT8;
     53     } else if (key == "qint16") {
     54       *out_tf_datatype = TF_QINT16;
     55     } else if (key == "quint16") {
     56       *out_tf_datatype = TF_QUINT16;
     57     } else if (key == "qint32") {
     58       *out_tf_datatype = TF_QINT32;
     59     } else if (key == "resource") {
     60       *out_tf_datatype = TF_RESOURCE;
     61     } else {
     62       return errors::Internal("Unsupported numpy data type");
     63     }
     64     return Status::OK();
     65   }
     66   return errors::Internal("Unsupported numpy data type");
     67 }
     68 
     69 Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array,
     70                                    TF_DataType* out_tf_datatype) {
     71   int pyarray_type = PyArray_TYPE(array);
     72   PyArray_Descr* descr = PyArray_DESCR(array);
     73   switch (pyarray_type) {
     74     case NPY_FLOAT16:
     75       *out_tf_datatype = TF_HALF;
     76       break;
     77     case NPY_FLOAT32:
     78       *out_tf_datatype = TF_FLOAT;
     79       break;
     80     case NPY_FLOAT64:
     81       *out_tf_datatype = TF_DOUBLE;
     82       break;
     83     case NPY_INT32:
     84       *out_tf_datatype = TF_INT32;
     85       break;
     86     case NPY_UINT8:
     87       *out_tf_datatype = TF_UINT8;
     88       break;
     89     case NPY_UINT16:
     90       *out_tf_datatype = TF_UINT16;
     91       break;
     92     case NPY_UINT32:
     93       *out_tf_datatype = TF_UINT32;
     94       break;
     95     case NPY_UINT64:
     96       *out_tf_datatype = TF_UINT64;
     97       break;
     98     case NPY_INT8:
     99       *out_tf_datatype = TF_INT8;
    100       break;
    101     case NPY_INT16:
    102       *out_tf_datatype = TF_INT16;
    103       break;
    104     case NPY_INT64:
    105       *out_tf_datatype = TF_INT64;
    106       break;
    107     case NPY_BOOL:
    108       *out_tf_datatype = TF_BOOL;
    109       break;
    110     case NPY_COMPLEX64:
    111       *out_tf_datatype = TF_COMPLEX64;
    112       break;
    113     case NPY_COMPLEX128:
    114       *out_tf_datatype = TF_COMPLEX128;
    115       break;
    116     case NPY_OBJECT:
    117     case NPY_STRING:
    118     case NPY_UNICODE:
    119       *out_tf_datatype = TF_STRING;
    120       break;
    121     case NPY_VOID:
    122       // Quantized types are currently represented as custom struct types.
    123       // PyArray_TYPE returns NPY_VOID for structs, and we should look into
    124       // descr to derive the actual type.
    125       // Direct feeds of certain types of ResourceHandles are represented as a
    126       // custom struct type.
    127       return PyArrayDescr_to_TF_DataType(descr, out_tf_datatype);
    128     default:
    129       if (pyarray_type == Bfloat16NumpyType()) {
    130         *out_tf_datatype = TF_BFLOAT16;
    131         break;
    132       }
    133       // TODO(mrry): Support these.
    134       return errors::Internal("Unsupported feed type");
    135   }
    136   return Status::OK();
    137 }
    138 
    139 // Iterate over the string array 'array', extract the ptr and len of each string
    140 // element and call f(ptr, len).
    141 template <typename F>
    142 Status PyBytesArrayMap(PyArrayObject* array, F f) {
    143   Safe_PyObjectPtr iter = tensorflow::make_safe(
    144       PyArray_IterNew(reinterpret_cast<PyObject*>(array)));
    145   while (PyArray_ITER_NOTDONE(iter.get())) {
    146     auto item = tensorflow::make_safe(PyArray_GETITEM(
    147         array, static_cast<char*>(PyArray_ITER_DATA(iter.get()))));
    148     if (!item.get()) {
    149       return errors::Internal("Unable to get element from the feed - no item.");
    150     }
    151     char* ptr;
    152     Py_ssize_t len;
    153 
    154     if (PyUnicode_Check(item.get())) {
    155 #if PY_VERSION_HEX >= 0x03030000
    156       // Accept unicode by converting to UTF-8 bytes.
    157       ptr = PyUnicode_AsUTF8AndSize(item.get(), &len);
    158       if (!ptr) {
    159         return errors::Internal("Unable to get element as UTF-8.");
    160       }
    161       f(ptr, len);
    162 #else
    163       PyObject* utemp = PyUnicode_AsUTF8String(item.get());
    164       if (!utemp || PyBytes_AsStringAndSize(utemp, &ptr, &len) == -1) {
    165         Py_XDECREF(utemp);
    166         return errors::Internal("Unable to convert element to UTF-8.");
    167       }
    168       f(ptr, len);
    169       Py_DECREF(utemp);
    170 #endif
    171     } else {
    172       int success = PyBytes_AsStringAndSize(item.get(), &ptr, &len);
    173       if (success != 0) {
    174         return errors::Internal("Unable to get element as bytes.");
    175       }
    176       f(ptr, len);
    177     }
    178     PyArray_ITER_NEXT(iter.get());
    179   }
    180   return Status::OK();
    181 }
    182 
    183 // Encode the strings in 'array' into a contiguous buffer and return the base of
    184 // the buffer. The caller takes ownership of the buffer.
    185 Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
    186                           size_t* size, void** buffer) {
    187   // Compute bytes needed for encoding.
    188   *size = 0;
    189   TF_RETURN_IF_ERROR(PyBytesArrayMap(array, [&size](char* ptr, Py_ssize_t len) {
    190     *size +=
    191         sizeof(tensorflow::uint64) + tensorflow::core::VarintLength(len) + len;
    192   }));
    193   // Encode all strings.
    194   std::unique_ptr<char[]> base_ptr(new char[*size]);
    195   char* base = base_ptr.get();
    196   char* data_start = base + sizeof(tensorflow::uint64) * nelems;
    197   char* dst = data_start;  // Where next string is encoded.
    198   tensorflow::uint64* offsets = reinterpret_cast<tensorflow::uint64*>(base);
    199 
    200   TF_RETURN_IF_ERROR(PyBytesArrayMap(
    201       array, [&base, &data_start, &dst, &offsets](char* ptr, Py_ssize_t len) {
    202         *offsets = (dst - data_start);
    203         offsets++;
    204         dst = tensorflow::core::EncodeVarint64(dst, len);
    205         memcpy(dst, ptr, len);
    206         dst += len;
    207       }));
    208   CHECK_EQ(dst, base + *size);
    209   *buffer = base_ptr.release();
    210   return Status::OK();
    211 }
    212 
    213 Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems,
    214                                      PyArrayObject* dst) {
    215   const void* tensor_data = TF_TensorData(src);
    216   const size_t tensor_size = TF_TensorByteSize(src);
    217   const char* limit = static_cast<const char*>(tensor_data) + tensor_size;
    218   DCHECK(tensor_data != nullptr);
    219   DCHECK_EQ(TF_STRING, TF_TensorType(src));
    220 
    221   const uint64* offsets = static_cast<const uint64*>(tensor_data);
    222   const size_t offsets_size = sizeof(uint64) * nelems;
    223   const char* data = static_cast<const char*>(tensor_data) + offsets_size;
    224 
    225   const size_t expected_tensor_size =
    226       (limit - static_cast<const char*>(tensor_data));
    227   if (expected_tensor_size - tensor_size) {
    228     return errors::InvalidArgument(
    229         "Invalid/corrupt TF_STRING tensor: expected ", expected_tensor_size,
    230         " bytes of encoded strings for the tensor containing ", nelems,
    231         " strings, but the tensor is encoded in ", tensor_size, " bytes");
    232   }
    233   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
    234       TF_NewStatus(), TF_DeleteStatus);
    235   auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst)));
    236   for (int64 i = 0; i < nelems; ++i) {
    237     const char* start = data + offsets[i];
    238     const char* ptr = nullptr;
    239     size_t len = 0;
    240 
    241     TF_StringDecode(start, limit - start, &ptr, &len, status.get());
    242     if (TF_GetCode(status.get()) != TF_OK) {
    243       return errors::InvalidArgument(TF_Message(status.get()));
    244     }
    245 
    246     auto py_string = make_safe(PyBytes_FromStringAndSize(ptr, len));
    247     if (py_string == nullptr) {
    248       return errors::Internal(
    249           "failed to create a python byte array when converting element #", i,
    250           " of a TF_STRING tensor to a numpy ndarray");
    251     }
    252 
    253     if (PyArray_SETITEM(dst, static_cast<char*>(PyArray_ITER_DATA(iter.get())),
    254                         py_string.get()) != 0) {
    255       return errors::Internal("Error settings element #", i,
    256                               " in the numpy ndarray");
    257     }
    258     PyArray_ITER_NEXT(iter.get());
    259   }
    260   return Status::OK();
    261 }
    262 
    263 // Determine the dimensions of a numpy ndarray to be created to represent an
    264 // output Tensor.
    265 gtl::InlinedVector<npy_intp, 4> GetPyArrayDimensionsForTensor(
    266     const TF_Tensor* tensor, tensorflow::int64* nelems) {
    267   const int ndims = TF_NumDims(tensor);
    268   gtl::InlinedVector<npy_intp, 4> dims(ndims);
    269   if (TF_TensorType(tensor) == TF_RESOURCE) {
    270     dims[0] = TF_TensorByteSize(tensor);
    271     *nelems = dims[0];
    272   } else {
    273     *nelems = 1;
    274     for (int i = 0; i < ndims; ++i) {
    275       dims[i] = TF_Dim(tensor, i);
    276       *nelems *= dims[i];
    277     }
    278   }
    279   return dims;
    280 }
    281 
    282 // Determine the type description (PyArray_Descr) of a numpy ndarray to be
    283 // created to represent an output Tensor.
    284 Status GetPyArrayDescrForTensor(const TF_Tensor* tensor,
    285                                 PyArray_Descr** descr) {
    286   if (TF_TensorType(tensor) == TF_RESOURCE) {
    287     PyObject* field = PyTuple_New(3);
    288 #if PY_MAJOR_VERSION < 3
    289     PyTuple_SetItem(field, 0, PyBytes_FromString("resource"));
    290 #else
    291     PyTuple_SetItem(field, 0, PyUnicode_FromString("resource"));
    292 #endif
    293     PyTuple_SetItem(field, 1, PyArray_TypeObjectFromType(NPY_UBYTE));
    294     PyTuple_SetItem(field, 2, PyLong_FromLong(1));
    295     PyObject* fields = PyList_New(1);
    296     PyList_SetItem(fields, 0, field);
    297     int convert_result = PyArray_DescrConverter(fields, descr);
    298     Py_CLEAR(field);
    299     Py_CLEAR(fields);
    300     if (convert_result != 1) {
    301       return errors::Internal("Failed to create numpy array description for ",
    302                               "TF_RESOURCE-type tensor");
    303     }
    304   } else {
    305     int type_num = -1;
    306     TF_RETURN_IF_ERROR(
    307         TF_DataType_to_PyArray_TYPE(TF_TensorType(tensor), &type_num));
    308     *descr = PyArray_DescrFromType(type_num);
    309   }
    310 
    311   return Status::OK();
    312 }
    313 }  // namespace
    314 
    315 // Converts the given TF_Tensor to a numpy ndarray.
    316 // If the returned status is OK, the caller becomes the owner of *out_array.
    317 Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) {
    318   // A fetched operation will correspond to a null tensor, and a None
    319   // in Python.
    320   if (tensor == nullptr) {
    321     Py_INCREF(Py_None);
    322     *out_ndarray = Py_None;
    323     return Status::OK();
    324   }
    325   int64 nelems = -1;
    326   gtl::InlinedVector<npy_intp, 4> dims =
    327       GetPyArrayDimensionsForTensor(tensor.get(), &nelems);
    328 
    329   // If the type is neither string nor resource we can reuse the Tensor memory.
    330   TF_Tensor* original = tensor.get();
    331   TF_Tensor* moved = TF_TensorMaybeMove(tensor.release());
    332   if (moved != nullptr) {
    333     if (ArrayFromMemory(dims.size(), dims.data(), TF_TensorData(moved),
    334                         static_cast<DataType>(TF_TensorType(moved)),
    335                         [moved] { TF_DeleteTensor(moved); }, out_ndarray)
    336             .ok()) {
    337       return Status::OK();
    338     }
    339   }
    340   tensor.reset(original);
    341 
    342   // Copy the TF_TensorData into a newly-created ndarray and return it.
    343   PyArray_Descr* descr = nullptr;
    344   TF_RETURN_IF_ERROR(GetPyArrayDescrForTensor(tensor.get(), &descr));
    345   Safe_PyObjectPtr safe_out_array =
    346       tensorflow::make_safe(PyArray_Empty(dims.size(), dims.data(), descr, 0));
    347   if (!safe_out_array) {
    348     return errors::Internal("Could not allocate ndarray");
    349   }
    350   PyArrayObject* py_array =
    351       reinterpret_cast<PyArrayObject*>(safe_out_array.get());
    352   if (TF_TensorType(tensor.get()) == TF_STRING) {
    353     Status s = CopyTF_TensorStringsToPyArray(tensor.get(), nelems, py_array);
    354     if (!s.ok()) {
    355       return s;
    356     }
    357   } else if (static_cast<size_t>(PyArray_NBYTES(py_array)) !=
    358              TF_TensorByteSize(tensor.get())) {
    359     return errors::Internal("ndarray was ", PyArray_NBYTES(py_array),
    360                             " bytes but TF_Tensor was ",
    361                             TF_TensorByteSize(tensor.get()), " bytes");
    362   } else {
    363     memcpy(PyArray_DATA(py_array), TF_TensorData(tensor.get()),
    364            PyArray_NBYTES(py_array));
    365   }
    366 
    367   // PyArray_Return turns rank 0 arrays into numpy scalars
    368   *out_ndarray = PyArray_Return(
    369       reinterpret_cast<PyArrayObject*>(safe_out_array.release()));
    370   return Status::OK();
    371 }
    372 
    373 Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) {
    374   DCHECK(out_tensor != nullptr);
    375 
    376   // Make sure we dereference this array object in case of error, etc.
    377   Safe_PyObjectPtr array_safe(make_safe(
    378       PyArray_FromAny(ndarray, nullptr, 0, 0, NPY_ARRAY_CARRAY, nullptr)));
    379   if (!array_safe) return errors::InvalidArgument("Not a ndarray.");
    380   PyArrayObject* array = reinterpret_cast<PyArrayObject*>(array_safe.get());
    381 
    382   // Convert numpy dtype to TensorFlow dtype.
    383   TF_DataType dtype = TF_FLOAT;
    384   TF_RETURN_IF_ERROR(PyArray_TYPE_to_TF_DataType(array, &dtype));
    385 
    386   tensorflow::int64 nelems = 1;
    387   gtl::InlinedVector<int64_t, 4> dims;
    388   for (int i = 0; i < PyArray_NDIM(array); ++i) {
    389     dims.push_back(PyArray_SHAPE(array)[i]);
    390     nelems *= dims[i];
    391   }
    392 
    393   // Create a TF_Tensor based on the fed data. In the case of non-string data
    394   // type, this steals a reference to array, which will be relinquished when
    395   // the underlying buffer is deallocated. For string, a new temporary buffer
    396   // is allocated into which the strings are encoded.
    397   if (dtype == TF_RESOURCE) {
    398     size_t size = PyArray_NBYTES(array);
    399     array_safe.release();
    400     *out_tensor = make_safe(TF_NewTensor(dtype, {}, 0, PyArray_DATA(array),
    401                                          size, &DelayedNumpyDecref, array));
    402 
    403   } else if (dtype != TF_STRING) {
    404     size_t size = PyArray_NBYTES(array);
    405     array_safe.release();
    406     *out_tensor = make_safe(TF_NewTensor(dtype, dims.data(), dims.size(),
    407                                          PyArray_DATA(array), size,
    408                                          &DelayedNumpyDecref, array));
    409   } else {
    410     size_t size = 0;
    411     void* encoded = nullptr;
    412     TF_RETURN_IF_ERROR(EncodePyBytesArray(array, nelems, &size, &encoded));
    413     *out_tensor =
    414         make_safe(TF_NewTensor(dtype, dims.data(), dims.size(), encoded, size,
    415                                [](void* data, size_t len, void* arg) {
    416                                  delete[] reinterpret_cast<char*>(data);
    417                                },
    418                                nullptr));
    419   }
    420   return Status::OK();
    421 }
    422 
    423 Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst);
    424 TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
    425                                TF_Status* status);
    426 
    427 Status NdarrayToTensor(PyObject* obj, Tensor* ret) {
    428   Safe_TF_TensorPtr tf_tensor = make_safe(static_cast<TF_Tensor*>(nullptr));
    429   Status s = PyArrayToTF_Tensor(obj, &tf_tensor);
    430   if (!s.ok()) {
    431     return s;
    432   }
    433   return TF_TensorToTensor(tf_tensor.get(), ret);
    434 }
    435 
    436 Status TensorToNdarray(const Tensor& t, PyObject** ret) {
    437   TF_Status* status = TF_NewStatus();
    438   Safe_TF_TensorPtr tf_tensor = make_safe(TF_TensorFromTensor(t, status));
    439   Status tf_status = StatusFromTF_Status(status);
    440   TF_DeleteStatus(status);
    441   if (!tf_status.ok()) {
    442     return tf_status;
    443   }
    444   return TF_TensorToPyArray(std::move(tf_tensor), ret);
    445 }
    446 
    447 }  // namespace tensorflow
    448