Home | History | Annotate | Download | only in camera
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // Full build instructions are at tensorflow/contrib/pi_examples/README.md.
     17 
     18 #include <errno.h>
     19 #include <fcntl.h>
     20 #include <libv4l2.h>
     21 #include <linux/videodev2.h>
     22 #include <stdio.h>
     23 #include <stdlib.h>
     24 #include <string.h>
     25 #include <sys/ioctl.h>
     26 #include <sys/mman.h>
     27 #include <sys/time.h>
     28 #include <sys/types.h>
     29 #include <fstream>
     30 #include <vector>
     31 
     32 #include "tensorflow/core/framework/graph.pb.h"
     33 #include "tensorflow/core/framework/tensor.h"
     34 #include "tensorflow/core/graph/default_device.h"
     35 #include "tensorflow/core/graph/graph_def_builder.h"
     36 #include "tensorflow/core/lib/core/errors.h"
     37 #include "tensorflow/core/lib/core/stringpiece.h"
     38 #include "tensorflow/core/lib/core/threadpool.h"
     39 #include "tensorflow/core/lib/io/path.h"
     40 #include "tensorflow/core/lib/strings/stringprintf.h"
     41 #include "tensorflow/core/platform/init_main.h"
     42 #include "tensorflow/core/platform/logging.h"
     43 #include "tensorflow/core/platform/types.h"
     44 #include "tensorflow/core/public/session.h"
     45 #include "tensorflow/core/util/command_line_flags.h"
     46 
     47 // These are all common classes it's handy to reference with no namespace.
     48 using tensorflow::Flag;
     49 using tensorflow::int32;
     50 using tensorflow::Status;
     51 using tensorflow::string;
     52 using tensorflow::Tensor;
     53 
     54 // Used to store the memory-mapped buffers we use for capture.
     55 struct CameraBuffer {
     56   void* start;
     57   size_t length;
     58 };
     59 
     60 // Wrapper around camera command sending.
     61 Status SendCameraCommand(int fh, int request, void* arg) {
     62   int r;
     63   do {
     64     r = v4l2_ioctl(fh, request, arg);
     65   } while (r == -1 && ((errno == EINTR) || (errno == EAGAIN)));
     66   if (r == -1) {
     67     LOG(ERROR) << "SendCameraCommand error " << errno << " (" << strerror(errno)
     68                << ")";
     69     return tensorflow::errors::Unknown("SendCameraCommand error ", errno,
     70                                        strerror(errno));
     71   }
     72   return Status::OK();
     73 }
     74 
     75 Status OpenCamera(int* camera_handle) {
     76   const char* dev_name = "/dev/video0";
     77   int fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0);
     78   if (fd < 0) {
     79     LOG(ERROR) << "Cannot open camera device";
     80     return tensorflow::errors::NotFound("V4L2 camera device not found");
     81   }
     82   *camera_handle = fd;
     83   return Status::OK();
     84 }
     85 
     86 Status CloseCamera(int camera_handle) {
     87   v4l2_close(camera_handle);
     88   return Status::OK();
     89 }
     90 
     91 Status SetCameraFormat(int camera_handle, int wanted_width, int wanted_height) {
     92   struct v4l2_format fmt;
     93   memset(&fmt, 0, sizeof(fmt));
     94   fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
     95   fmt.fmt.pix.width = wanted_width;
     96   fmt.fmt.pix.height = wanted_height;
     97   fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
     98   fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
     99   Status set_format_status =
    100       SendCameraCommand(camera_handle, VIDIOC_S_FMT, &fmt);
    101   if (!set_format_status.ok()) {
    102     LOG(ERROR) << "Setting format failed with " << set_format_status;
    103     return set_format_status;
    104   }
    105   if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_RGB24) {
    106     LOG(ERROR) << "Libv4l didn't accept RGB24 format. Can't proceed.";
    107     return tensorflow::errors::Unknown("Libv4l didn't accept RGB24 format");
    108   }
    109   if ((fmt.fmt.pix.width != wanted_width) ||
    110       (fmt.fmt.pix.height != wanted_height)) {
    111     LOG(WARNING) << "Warning: driver is sending image at " << fmt.fmt.pix.width
    112                  << "x" << fmt.fmt.pix.height;
    113   }
    114   return Status::OK();
    115 }
    116 
    117 Status StartCameraCapture(int camera_handle, int buffer_count,
    118                           CameraBuffer** buffers) {
    119   struct v4l2_requestbuffers req;
    120   memset(&req, 0, sizeof(req));
    121   req.count = buffer_count;
    122   req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    123   req.memory = V4L2_MEMORY_MMAP;
    124   Status request_buffers_status =
    125       SendCameraCommand(camera_handle, VIDIOC_REQBUFS, &req);
    126   if (!request_buffers_status.ok()) {
    127     LOG(ERROR) << "Request buffers failed with " << request_buffers_status;
    128     return request_buffers_status;
    129   }
    130 
    131   *buffers = (CameraBuffer*)(calloc(buffer_count, sizeof(*buffers)));
    132   for (int n_buffers = 0; n_buffers < buffer_count; ++n_buffers) {
    133     struct v4l2_buffer buf;
    134     memset(&buf, 0, sizeof(buf));
    135     buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    136     buf.memory = V4L2_MEMORY_MMAP;
    137     buf.index = n_buffers;
    138     Status query_buffer_status =
    139         SendCameraCommand(camera_handle, VIDIOC_QUERYBUF, &buf);
    140     if (!query_buffer_status.ok()) {
    141       LOG(ERROR) << "Query buffer failed with " << query_buffer_status;
    142       return query_buffer_status;
    143     }
    144     (*buffers)[n_buffers].length = buf.length;
    145     (*buffers)[n_buffers].start =
    146         v4l2_mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED,
    147                   camera_handle, buf.m.offset);
    148 
    149     if (MAP_FAILED == (*buffers)[n_buffers].start) {
    150       LOG(ERROR) << "Memory-mapping buffer failed";
    151       return tensorflow::errors::Unknown("Memory-mapping buffer failed");
    152     }
    153   }
    154 
    155   for (int i = 0; i < buffer_count; ++i) {
    156     struct v4l2_buffer buf;
    157     memset(&buf, 0, sizeof(buf));
    158     buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    159     buf.memory = V4L2_MEMORY_MMAP;
    160     buf.index = i;
    161     Status set_buffer_status =
    162         SendCameraCommand(camera_handle, VIDIOC_QBUF, &buf);
    163     if (!set_buffer_status.ok()) {
    164       LOG(ERROR) << "Set buffer failed with " << set_buffer_status;
    165       return set_buffer_status;
    166     }
    167   }
    168 
    169   enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    170   Status stream_on_status =
    171       SendCameraCommand(camera_handle, VIDIOC_STREAMON, &type);
    172   if (!stream_on_status.ok()) {
    173     LOG(ERROR) << "Turning stream on failed with " << stream_on_status;
    174     return stream_on_status;
    175   }
    176   return Status::OK();
    177 }
    178 
    179 Status EndCameraCapture(int camera_handle, CameraBuffer* buffers,
    180                         int buffer_count) {
    181   enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    182   Status stream_off_status =
    183       SendCameraCommand(camera_handle, VIDIOC_STREAMOFF, &type);
    184   if (!stream_off_status.ok()) {
    185     LOG(ERROR) << "Turning stream off failed with " << stream_off_status;
    186     return stream_off_status;
    187   }
    188   for (int i = 0; i < buffer_count; ++i)
    189     v4l2_munmap(buffers[i].start, buffers[i].length);
    190   return Status::OK();
    191 }
    192 
    193 Status CaptureNextFrame(int camera_handle, CameraBuffer* buffers,
    194                         uint8_t** frame_data, int* frame_data_size,
    195                         v4l2_buffer* buf) {
    196   int r;
    197   do {
    198     fd_set fds;
    199     FD_ZERO(&fds);
    200     FD_SET(camera_handle, &fds);
    201     struct timeval tv;
    202     tv.tv_sec = 2;
    203     tv.tv_usec = 0;
    204     r = select(camera_handle + 1, &fds, NULL, NULL, &tv);
    205   } while ((r == -1 && (errno = EINTR)));
    206   if (r == -1) {
    207     LOG(ERROR) << "select() failed while waiting for the camera with " << errno;
    208     return tensorflow::errors::Unknown(
    209         "CaptureCameraFrame: select() failed with", errno);
    210   }
    211 
    212   memset(buf, 0, sizeof(*buf));
    213   buf->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    214   buf->memory = V4L2_MEMORY_MMAP;
    215   Status get_buffer_status =
    216       SendCameraCommand(camera_handle, VIDIOC_DQBUF, buf);
    217   if (!get_buffer_status.ok()) {
    218     LOG(ERROR) << "Get buffer failed with " << get_buffer_status;
    219     return get_buffer_status;
    220   }
    221 
    222   *frame_data = static_cast<uint8_t*>(buffers[buf->index].start);
    223   *frame_data_size = buf->bytesused;
    224 
    225   return Status::OK();
    226 }
    227 
    228 Status ReleaseFrame(int camera_handle, v4l2_buffer* buf) {
    229   Status release_buffer_status =
    230       SendCameraCommand(camera_handle, VIDIOC_QBUF, buf);
    231   if (!release_buffer_status.ok()) {
    232     LOG(ERROR) << "Release buffer failed with " << release_buffer_status;
    233     return release_buffer_status;
    234   }
    235 }
    236 
    237 // Reads a model graph definition from disk, and creates a session object you
    238 // can use to run it.
    239 Status LoadGraph(string graph_file_name,
    240                  std::unique_ptr<tensorflow::Session>* session) {
    241   tensorflow::GraphDef graph_def;
    242   Status load_graph_status =
    243       ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
    244   if (!load_graph_status.ok()) {
    245     return tensorflow::errors::NotFound("Failed to load compute graph at '",
    246                                         graph_file_name, "'");
    247   }
    248   session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
    249   Status session_create_status = (*session)->Create(graph_def);
    250   if (!session_create_status.ok()) {
    251     return session_create_status;
    252   }
    253   return Status::OK();
    254 }
    255 
    256 // Analyzes the output of the Inception graph to retrieve the highest scores and
    257 // their positions in the tensor, which correspond to categories.
    258 Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels,
    259                     Tensor* out_indices, Tensor* out_scores) {
    260   const Tensor& unsorted_scores_tensor = outputs[0];
    261   auto unsorted_scores_flat = unsorted_scores_tensor.flat<float>();
    262   std::vector<std::pair<int, float>> scores;
    263   for (int i = 0; i < unsorted_scores_flat.size(); ++i) {
    264     scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)}));
    265   }
    266   std::sort(scores.begin(), scores.end(),
    267             [](const std::pair<int, float>& left,
    268                const std::pair<int, float>& right) {
    269               return left.second > right.second;
    270             });
    271   scores.resize(how_many_labels);
    272   Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()});
    273   Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()});
    274   for (int i = 0; i < scores.size(); ++i) {
    275     sorted_indices.flat<int>()(i) = scores[i].first;
    276     sorted_scores.flat<float>()(i) = scores[i].second;
    277   }
    278   *out_indices = sorted_indices;
    279   *out_scores = sorted_scores;
    280   return Status::OK();
    281 }
    282 
    283 // Takes a file name, and loads a list of labels from it, one per line, and
    284 // returns a vector of the strings. It pads with empty strings so the length
    285 // of the result is a multiple of 16, because our model expects that.
    286 Status ReadLabelsFile(string file_name, std::vector<string>* result,
    287                       size_t* found_label_count) {
    288   std::ifstream file(file_name);
    289   if (!file) {
    290     return tensorflow::errors::NotFound("Labels file ", file_name,
    291                                         " not found.");
    292   }
    293   result->clear();
    294   string line;
    295   while (std::getline(file, line)) {
    296     result->push_back(line);
    297   }
    298   *found_label_count = result->size();
    299   const int padding = 16;
    300   while (result->size() % padding) {
    301     result->emplace_back();
    302   }
    303   return Status::OK();
    304 }
    305 
    306 // Given the output of a model run, and the name of a file containing the labels
    307 // this prints out the top five highest-scoring values.
    308 Status PrintTopLabels(const std::vector<Tensor>& outputs,
    309                       const std::vector<string>& labels, int label_count,
    310                       float print_threshold) {
    311   const int how_many_labels = std::min(5, static_cast<int>(label_count));
    312   Tensor indices;
    313   Tensor scores;
    314   TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores));
    315   tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>();
    316   tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>();
    317   for (int pos = 0; pos < how_many_labels; ++pos) {
    318     const int label_index = indices_flat(pos);
    319     const float score = scores_flat(pos);
    320     LOG(INFO) << labels[label_index] << " (" << label_index << "): " << score;
    321     // Print the top label to stdout if it's above a threshold.
    322     if ((pos == 0) && (score > print_threshold)) {
    323       std::cout << labels[label_index] << std::endl;
    324     }
    325   }
    326   return Status::OK();
    327 }
    328 
    329 // Given an image buffer, resize it to the requested size, and then scale the
    330 // values as desired.
    331 Status TensorFromFrame(uint8_t* image_data, int image_width, int image_height,
    332                        int image_channels, const int wanted_height,
    333                        const int wanted_width, const float input_mean,
    334                        const float input_std,
    335                        std::vector<Tensor>* out_tensors) {
    336   const int wanted_channels = 3;
    337   if (image_channels < wanted_channels) {
    338     return tensorflow::errors::FailedPrecondition(
    339         "Image needs to have at least ", wanted_channels, " but only has ",
    340         image_channels);
    341   }
    342   // In these loops, we convert the eight-bit data in the image into float,
    343   // resize it using bilinear filtering, and scale it numerically to the float
    344   // range that the model expects (given by input_mean and input_std).
    345   tensorflow::Tensor image_tensor(
    346       tensorflow::DT_FLOAT,
    347       tensorflow::TensorShape(
    348           {1, wanted_height, wanted_width, wanted_channels}));
    349   auto image_tensor_mapped = image_tensor.tensor<float, 4>();
    350   tensorflow::uint8* in = image_data;
    351   float* out = image_tensor_mapped.data();
    352   const size_t image_rowlen = image_width * image_channels;
    353   const float width_scale = static_cast<float>(image_width) / wanted_width;
    354   const float height_scale = static_cast<float>(image_height) / wanted_height;
    355   for (int y = 0; y < wanted_height; ++y) {
    356     const float in_y = y * height_scale;
    357     const int top_y_index = static_cast<int>(floorf(in_y));
    358     const int bottom_y_index =
    359         std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
    360     const float y_lerp = in_y - top_y_index;
    361     tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen);
    362     tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen);
    363     float* out_row = out + (y * wanted_width * wanted_channels);
    364     for (int x = 0; x < wanted_width; ++x) {
    365       const float in_x = x * width_scale;
    366       const int left_x_index = static_cast<int>(floorf(in_x));
    367       const int right_x_index =
    368           std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
    369       tensorflow::uint8* in_top_left_pixel =
    370           in_top_row + (left_x_index * wanted_channels);
    371       tensorflow::uint8* in_top_right_pixel =
    372           in_top_row + (right_x_index * wanted_channels);
    373       tensorflow::uint8* in_bottom_left_pixel =
    374           in_bottom_row + (left_x_index * wanted_channels);
    375       tensorflow::uint8* in_bottom_right_pixel =
    376           in_bottom_row + (right_x_index * wanted_channels);
    377       const float x_lerp = in_x - left_x_index;
    378       float* out_pixel = out_row + (x * wanted_channels);
    379       for (int c = 0; c < wanted_channels; ++c) {
    380         const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
    381         const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
    382         const float bottom_left((in_bottom_left_pixel[c] - input_mean) /
    383                                 input_std);
    384         const float bottom_right((in_bottom_right_pixel[c] - input_mean) /
    385                                  input_std);
    386         const float top = top_left + (top_right - top_left) * x_lerp;
    387         const float bottom =
    388             bottom_left + (bottom_right - bottom_left) * x_lerp;
    389         out_pixel[c] = top + (bottom - top) * y_lerp;
    390       }
    391     }
    392   }
    393 
    394   out_tensors->push_back(image_tensor);
    395   return Status::OK();
    396 }
    397 
    398 int main(int argc, char** argv) {
    399   string graph =
    400       "tensorflow/contrib/pi_examples/label_image/data/"
    401       "tensorflow_inception_stripped.pb";
    402   string labels_file_name =
    403       "tensorflow/contrib/pi_examples/label_image/data/"
    404       "imagenet_comp_graph_label_strings.txt";
    405   int32 input_width = 299;
    406   int32 input_height = 299;
    407   int32 input_mean = 128;
    408   int32 input_std = 128;
    409   string input_layer = "Mul";
    410   string output_layer = "softmax";
    411   int32 video_width = 640;
    412   int32 video_height = 480;
    413   int print_threshold = 50;
    414   string root_dir = "";
    415   std::vector<Flag> flag_list = {
    416       Flag("graph", &graph, "graph file name"),
    417       Flag("labels", &labels_file_name, "labels file name"),
    418       Flag("input_width", &input_width, "image input width"),
    419       Flag("input_height", &input_height, "image input height"),
    420       Flag("input_mean", &input_mean, "transformed mean of input pixels"),
    421       Flag("input_std", &input_std, "transformed std dev of input pixels"),
    422       Flag("input_layer", &input_layer, "input layer name"),
    423       Flag("output_layer", &output_layer, "output layer name"),
    424       Flag("video_width", &video_width, "video width expected from device"),
    425       Flag("video_height", &video_height, "video height expected from device"),
    426       Flag("print_threshold", &print_threshold,
    427            "print labels with scoe exceeding this"),
    428       Flag("root_dir", &root_dir,
    429            "interpret graph file name relative to this directory")};
    430   string usage = tensorflow::Flags::Usage(argv[0], flag_list);
    431   const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
    432 
    433   if (!parse_result || argc != 1) {
    434     LOG(ERROR) << "\n" << usage;
    435     return -1;
    436   }
    437 
    438   // First we load and initialize the model.
    439   std::unique_ptr<tensorflow::Session> session;
    440   string graph_path = tensorflow::io::JoinPath(root_dir, graph);
    441   Status load_graph_status = LoadGraph(graph_path, &session);
    442   if (!load_graph_status.ok()) {
    443     LOG(ERROR) << load_graph_status;
    444     return -1;
    445   }
    446 
    447   std::vector<string> labels;
    448   size_t label_count;
    449   Status read_labels_status =
    450       ReadLabelsFile(labels_file_name, &labels, &label_count);
    451   if (!read_labels_status.ok()) {
    452     LOG(ERROR) << read_labels_status;
    453     return -1;
    454   }
    455 
    456   int camera_handle;
    457   Status open_status = OpenCamera(&camera_handle);
    458   if (!open_status.ok()) {
    459     LOG(ERROR) << "OpenCamera failed with " << open_status;
    460     return -1;
    461   }
    462 
    463   Status format_status =
    464       SetCameraFormat(camera_handle, video_width, video_height);
    465   if (!format_status.ok()) {
    466     LOG(ERROR) << "SetCameraFormat failed with " << format_status;
    467     return -1;
    468   }
    469 
    470   const int how_many_buffers = 2;
    471   CameraBuffer* buffers;
    472   Status start_capture_status =
    473       StartCameraCapture(camera_handle, how_many_buffers, &buffers);
    474   if (!start_capture_status.ok()) {
    475     LOG(ERROR) << "StartCameraCapture failed with " << start_capture_status;
    476     return -1;
    477   }
    478 
    479   for (int i = 0; i < 200; i++) {
    480     uint8_t* frame_data;
    481     int frame_data_size;
    482     v4l2_buffer buf;
    483     Status capture_next_status = CaptureNextFrame(
    484         camera_handle, buffers, &frame_data, &frame_data_size, &buf);
    485     if (!capture_next_status.ok()) {
    486       LOG(ERROR) << "CaptureNextFrame failed with " << capture_next_status;
    487       return -1;
    488     }
    489 
    490     std::vector<Tensor> resized_tensors;
    491     Status tensor_from_frame_status =
    492         TensorFromFrame(frame_data, video_width, video_height, 3, input_height,
    493                         input_width, input_mean, input_std, &resized_tensors);
    494     if (!tensor_from_frame_status.ok()) {
    495       LOG(ERROR) << tensor_from_frame_status;
    496       return -1;
    497     }
    498     const Tensor& resized_tensor = resized_tensors[0];
    499 
    500     Status release_frame_status = ReleaseFrame(camera_handle, &buf);
    501     if (!release_frame_status.ok()) {
    502       LOG(ERROR) << "ReleaseFrame failed with " << release_frame_status;
    503       return -1;
    504     }
    505 
    506     // Actually run the image through the model.
    507     std::vector<Tensor> outputs;
    508     Status run_status = session->Run({{input_layer, resized_tensor}},
    509                                      {output_layer}, {}, &outputs);
    510     if (!run_status.ok()) {
    511       LOG(ERROR) << "Running model failed: " << run_status;
    512       return -1;
    513     }
    514 
    515     // Do something interesting with the results we've generated.
    516     Status print_status =
    517         PrintTopLabels(outputs, labels, label_count, print_threshold * 0.01f);
    518     if (!print_status.ok()) {
    519       LOG(ERROR) << "Running print failed: " << print_status;
    520       return -1;
    521     }
    522   }
    523 
    524   Status end_capture_status =
    525       EndCameraCapture(camera_handle, buffers, how_many_buffers);
    526   if (!end_capture_status.ok()) {
    527     LOG(ERROR) << "EndCameraCapture failed with " << end_capture_status;
    528     return -1;
    529   }
    530 
    531   Status close_status = CloseCamera(camera_handle);
    532   if (!close_status.ok()) {
    533     LOG(ERROR) << "CloseCamera failed with " << open_status;
    534     return -1;
    535   }
    536 
    537   return 0;
    538 }
    539