1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Full build instructions are at tensorflow/contrib/pi_examples/README.md. 17 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <libv4l2.h> 21 #include <linux/videodev2.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <sys/ioctl.h> 26 #include <sys/mman.h> 27 #include <sys/time.h> 28 #include <sys/types.h> 29 #include <fstream> 30 #include <vector> 31 32 #include "tensorflow/core/framework/graph.pb.h" 33 #include "tensorflow/core/framework/tensor.h" 34 #include "tensorflow/core/graph/default_device.h" 35 #include "tensorflow/core/graph/graph_def_builder.h" 36 #include "tensorflow/core/lib/core/errors.h" 37 #include "tensorflow/core/lib/core/stringpiece.h" 38 #include "tensorflow/core/lib/core/threadpool.h" 39 #include "tensorflow/core/lib/io/path.h" 40 #include "tensorflow/core/lib/strings/stringprintf.h" 41 #include "tensorflow/core/platform/init_main.h" 42 #include "tensorflow/core/platform/logging.h" 43 #include "tensorflow/core/platform/types.h" 44 #include "tensorflow/core/public/session.h" 45 #include "tensorflow/core/util/command_line_flags.h" 46 47 // These are all common classes it's handy to reference with no namespace. 48 using tensorflow::Flag; 49 using tensorflow::int32; 50 using tensorflow::Status; 51 using tensorflow::string; 52 using tensorflow::Tensor; 53 54 // Used to store the memory-mapped buffers we use for capture. 55 struct CameraBuffer { 56 void* start; 57 size_t length; 58 }; 59 60 // Wrapper around camera command sending. 61 Status SendCameraCommand(int fh, int request, void* arg) { 62 int r; 63 do { 64 r = v4l2_ioctl(fh, request, arg); 65 } while (r == -1 && ((errno == EINTR) || (errno == EAGAIN))); 66 if (r == -1) { 67 LOG(ERROR) << "SendCameraCommand error " << errno << " (" << strerror(errno) 68 << ")"; 69 return tensorflow::errors::Unknown("SendCameraCommand error ", errno, 70 strerror(errno)); 71 } 72 return Status::OK(); 73 } 74 75 Status OpenCamera(int* camera_handle) { 76 const char* dev_name = "/dev/video0"; 77 int fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0); 78 if (fd < 0) { 79 LOG(ERROR) << "Cannot open camera device"; 80 return tensorflow::errors::NotFound("V4L2 camera device not found"); 81 } 82 *camera_handle = fd; 83 return Status::OK(); 84 } 85 86 Status CloseCamera(int camera_handle) { 87 v4l2_close(camera_handle); 88 return Status::OK(); 89 } 90 91 Status SetCameraFormat(int camera_handle, int wanted_width, int wanted_height) { 92 struct v4l2_format fmt; 93 memset(&fmt, 0, sizeof(fmt)); 94 fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 95 fmt.fmt.pix.width = wanted_width; 96 fmt.fmt.pix.height = wanted_height; 97 fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24; 98 fmt.fmt.pix.field = V4L2_FIELD_INTERLACED; 99 Status set_format_status = 100 SendCameraCommand(camera_handle, VIDIOC_S_FMT, &fmt); 101 if (!set_format_status.ok()) { 102 LOG(ERROR) << "Setting format failed with " << set_format_status; 103 return set_format_status; 104 } 105 if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_RGB24) { 106 LOG(ERROR) << "Libv4l didn't accept RGB24 format. Can't proceed."; 107 return tensorflow::errors::Unknown("Libv4l didn't accept RGB24 format"); 108 } 109 if ((fmt.fmt.pix.width != wanted_width) || 110 (fmt.fmt.pix.height != wanted_height)) { 111 LOG(WARNING) << "Warning: driver is sending image at " << fmt.fmt.pix.width 112 << "x" << fmt.fmt.pix.height; 113 } 114 return Status::OK(); 115 } 116 117 Status StartCameraCapture(int camera_handle, int buffer_count, 118 CameraBuffer** buffers) { 119 struct v4l2_requestbuffers req; 120 memset(&req, 0, sizeof(req)); 121 req.count = buffer_count; 122 req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 123 req.memory = V4L2_MEMORY_MMAP; 124 Status request_buffers_status = 125 SendCameraCommand(camera_handle, VIDIOC_REQBUFS, &req); 126 if (!request_buffers_status.ok()) { 127 LOG(ERROR) << "Request buffers failed with " << request_buffers_status; 128 return request_buffers_status; 129 } 130 131 *buffers = (CameraBuffer*)(calloc(buffer_count, sizeof(*buffers))); 132 for (int n_buffers = 0; n_buffers < buffer_count; ++n_buffers) { 133 struct v4l2_buffer buf; 134 memset(&buf, 0, sizeof(buf)); 135 buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 136 buf.memory = V4L2_MEMORY_MMAP; 137 buf.index = n_buffers; 138 Status query_buffer_status = 139 SendCameraCommand(camera_handle, VIDIOC_QUERYBUF, &buf); 140 if (!query_buffer_status.ok()) { 141 LOG(ERROR) << "Query buffer failed with " << query_buffer_status; 142 return query_buffer_status; 143 } 144 (*buffers)[n_buffers].length = buf.length; 145 (*buffers)[n_buffers].start = 146 v4l2_mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, 147 camera_handle, buf.m.offset); 148 149 if (MAP_FAILED == (*buffers)[n_buffers].start) { 150 LOG(ERROR) << "Memory-mapping buffer failed"; 151 return tensorflow::errors::Unknown("Memory-mapping buffer failed"); 152 } 153 } 154 155 for (int i = 0; i < buffer_count; ++i) { 156 struct v4l2_buffer buf; 157 memset(&buf, 0, sizeof(buf)); 158 buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 159 buf.memory = V4L2_MEMORY_MMAP; 160 buf.index = i; 161 Status set_buffer_status = 162 SendCameraCommand(camera_handle, VIDIOC_QBUF, &buf); 163 if (!set_buffer_status.ok()) { 164 LOG(ERROR) << "Set buffer failed with " << set_buffer_status; 165 return set_buffer_status; 166 } 167 } 168 169 enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 170 Status stream_on_status = 171 SendCameraCommand(camera_handle, VIDIOC_STREAMON, &type); 172 if (!stream_on_status.ok()) { 173 LOG(ERROR) << "Turning stream on failed with " << stream_on_status; 174 return stream_on_status; 175 } 176 return Status::OK(); 177 } 178 179 Status EndCameraCapture(int camera_handle, CameraBuffer* buffers, 180 int buffer_count) { 181 enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 182 Status stream_off_status = 183 SendCameraCommand(camera_handle, VIDIOC_STREAMOFF, &type); 184 if (!stream_off_status.ok()) { 185 LOG(ERROR) << "Turning stream off failed with " << stream_off_status; 186 return stream_off_status; 187 } 188 for (int i = 0; i < buffer_count; ++i) 189 v4l2_munmap(buffers[i].start, buffers[i].length); 190 return Status::OK(); 191 } 192 193 Status CaptureNextFrame(int camera_handle, CameraBuffer* buffers, 194 uint8_t** frame_data, int* frame_data_size, 195 v4l2_buffer* buf) { 196 int r; 197 do { 198 fd_set fds; 199 FD_ZERO(&fds); 200 FD_SET(camera_handle, &fds); 201 struct timeval tv; 202 tv.tv_sec = 2; 203 tv.tv_usec = 0; 204 r = select(camera_handle + 1, &fds, NULL, NULL, &tv); 205 } while ((r == -1 && (errno = EINTR))); 206 if (r == -1) { 207 LOG(ERROR) << "select() failed while waiting for the camera with " << errno; 208 return tensorflow::errors::Unknown( 209 "CaptureCameraFrame: select() failed with", errno); 210 } 211 212 memset(buf, 0, sizeof(*buf)); 213 buf->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 214 buf->memory = V4L2_MEMORY_MMAP; 215 Status get_buffer_status = 216 SendCameraCommand(camera_handle, VIDIOC_DQBUF, buf); 217 if (!get_buffer_status.ok()) { 218 LOG(ERROR) << "Get buffer failed with " << get_buffer_status; 219 return get_buffer_status; 220 } 221 222 *frame_data = static_cast<uint8_t*>(buffers[buf->index].start); 223 *frame_data_size = buf->bytesused; 224 225 return Status::OK(); 226 } 227 228 Status ReleaseFrame(int camera_handle, v4l2_buffer* buf) { 229 Status release_buffer_status = 230 SendCameraCommand(camera_handle, VIDIOC_QBUF, buf); 231 if (!release_buffer_status.ok()) { 232 LOG(ERROR) << "Release buffer failed with " << release_buffer_status; 233 return release_buffer_status; 234 } 235 } 236 237 // Reads a model graph definition from disk, and creates a session object you 238 // can use to run it. 239 Status LoadGraph(string graph_file_name, 240 std::unique_ptr<tensorflow::Session>* session) { 241 tensorflow::GraphDef graph_def; 242 Status load_graph_status = 243 ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def); 244 if (!load_graph_status.ok()) { 245 return tensorflow::errors::NotFound("Failed to load compute graph at '", 246 graph_file_name, "'"); 247 } 248 session->reset(tensorflow::NewSession(tensorflow::SessionOptions())); 249 Status session_create_status = (*session)->Create(graph_def); 250 if (!session_create_status.ok()) { 251 return session_create_status; 252 } 253 return Status::OK(); 254 } 255 256 // Analyzes the output of the Inception graph to retrieve the highest scores and 257 // their positions in the tensor, which correspond to categories. 258 Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels, 259 Tensor* out_indices, Tensor* out_scores) { 260 const Tensor& unsorted_scores_tensor = outputs[0]; 261 auto unsorted_scores_flat = unsorted_scores_tensor.flat<float>(); 262 std::vector<std::pair<int, float>> scores; 263 for (int i = 0; i < unsorted_scores_flat.size(); ++i) { 264 scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)})); 265 } 266 std::sort(scores.begin(), scores.end(), 267 [](const std::pair<int, float>& left, 268 const std::pair<int, float>& right) { 269 return left.second > right.second; 270 }); 271 scores.resize(how_many_labels); 272 Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()}); 273 Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()}); 274 for (int i = 0; i < scores.size(); ++i) { 275 sorted_indices.flat<int>()(i) = scores[i].first; 276 sorted_scores.flat<float>()(i) = scores[i].second; 277 } 278 *out_indices = sorted_indices; 279 *out_scores = sorted_scores; 280 return Status::OK(); 281 } 282 283 // Takes a file name, and loads a list of labels from it, one per line, and 284 // returns a vector of the strings. It pads with empty strings so the length 285 // of the result is a multiple of 16, because our model expects that. 286 Status ReadLabelsFile(string file_name, std::vector<string>* result, 287 size_t* found_label_count) { 288 std::ifstream file(file_name); 289 if (!file) { 290 return tensorflow::errors::NotFound("Labels file ", file_name, 291 " not found."); 292 } 293 result->clear(); 294 string line; 295 while (std::getline(file, line)) { 296 result->push_back(line); 297 } 298 *found_label_count = result->size(); 299 const int padding = 16; 300 while (result->size() % padding) { 301 result->emplace_back(); 302 } 303 return Status::OK(); 304 } 305 306 // Given the output of a model run, and the name of a file containing the labels 307 // this prints out the top five highest-scoring values. 308 Status PrintTopLabels(const std::vector<Tensor>& outputs, 309 const std::vector<string>& labels, int label_count, 310 float print_threshold) { 311 const int how_many_labels = std::min(5, static_cast<int>(label_count)); 312 Tensor indices; 313 Tensor scores; 314 TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores)); 315 tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>(); 316 tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>(); 317 for (int pos = 0; pos < how_many_labels; ++pos) { 318 const int label_index = indices_flat(pos); 319 const float score = scores_flat(pos); 320 LOG(INFO) << labels[label_index] << " (" << label_index << "): " << score; 321 // Print the top label to stdout if it's above a threshold. 322 if ((pos == 0) && (score > print_threshold)) { 323 std::cout << labels[label_index] << std::endl; 324 } 325 } 326 return Status::OK(); 327 } 328 329 // Given an image buffer, resize it to the requested size, and then scale the 330 // values as desired. 331 Status TensorFromFrame(uint8_t* image_data, int image_width, int image_height, 332 int image_channels, const int wanted_height, 333 const int wanted_width, const float input_mean, 334 const float input_std, 335 std::vector<Tensor>* out_tensors) { 336 const int wanted_channels = 3; 337 if (image_channels < wanted_channels) { 338 return tensorflow::errors::FailedPrecondition( 339 "Image needs to have at least ", wanted_channels, " but only has ", 340 image_channels); 341 } 342 // In these loops, we convert the eight-bit data in the image into float, 343 // resize it using bilinear filtering, and scale it numerically to the float 344 // range that the model expects (given by input_mean and input_std). 345 tensorflow::Tensor image_tensor( 346 tensorflow::DT_FLOAT, 347 tensorflow::TensorShape( 348 {1, wanted_height, wanted_width, wanted_channels})); 349 auto image_tensor_mapped = image_tensor.tensor<float, 4>(); 350 tensorflow::uint8* in = image_data; 351 float* out = image_tensor_mapped.data(); 352 const size_t image_rowlen = image_width * image_channels; 353 const float width_scale = static_cast<float>(image_width) / wanted_width; 354 const float height_scale = static_cast<float>(image_height) / wanted_height; 355 for (int y = 0; y < wanted_height; ++y) { 356 const float in_y = y * height_scale; 357 const int top_y_index = static_cast<int>(floorf(in_y)); 358 const int bottom_y_index = 359 std::min(static_cast<int>(ceilf(in_y)), (image_height - 1)); 360 const float y_lerp = in_y - top_y_index; 361 tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen); 362 tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen); 363 float* out_row = out + (y * wanted_width * wanted_channels); 364 for (int x = 0; x < wanted_width; ++x) { 365 const float in_x = x * width_scale; 366 const int left_x_index = static_cast<int>(floorf(in_x)); 367 const int right_x_index = 368 std::min(static_cast<int>(ceilf(in_x)), (image_width - 1)); 369 tensorflow::uint8* in_top_left_pixel = 370 in_top_row + (left_x_index * wanted_channels); 371 tensorflow::uint8* in_top_right_pixel = 372 in_top_row + (right_x_index * wanted_channels); 373 tensorflow::uint8* in_bottom_left_pixel = 374 in_bottom_row + (left_x_index * wanted_channels); 375 tensorflow::uint8* in_bottom_right_pixel = 376 in_bottom_row + (right_x_index * wanted_channels); 377 const float x_lerp = in_x - left_x_index; 378 float* out_pixel = out_row + (x * wanted_channels); 379 for (int c = 0; c < wanted_channels; ++c) { 380 const float top_left((in_top_left_pixel[c] - input_mean) / input_std); 381 const float top_right((in_top_right_pixel[c] - input_mean) / input_std); 382 const float bottom_left((in_bottom_left_pixel[c] - input_mean) / 383 input_std); 384 const float bottom_right((in_bottom_right_pixel[c] - input_mean) / 385 input_std); 386 const float top = top_left + (top_right - top_left) * x_lerp; 387 const float bottom = 388 bottom_left + (bottom_right - bottom_left) * x_lerp; 389 out_pixel[c] = top + (bottom - top) * y_lerp; 390 } 391 } 392 } 393 394 out_tensors->push_back(image_tensor); 395 return Status::OK(); 396 } 397 398 int main(int argc, char** argv) { 399 string graph = 400 "tensorflow/contrib/pi_examples/label_image/data/" 401 "tensorflow_inception_stripped.pb"; 402 string labels_file_name = 403 "tensorflow/contrib/pi_examples/label_image/data/" 404 "imagenet_comp_graph_label_strings.txt"; 405 int32 input_width = 299; 406 int32 input_height = 299; 407 int32 input_mean = 128; 408 int32 input_std = 128; 409 string input_layer = "Mul"; 410 string output_layer = "softmax"; 411 int32 video_width = 640; 412 int32 video_height = 480; 413 int print_threshold = 50; 414 string root_dir = ""; 415 std::vector<Flag> flag_list = { 416 Flag("graph", &graph, "graph file name"), 417 Flag("labels", &labels_file_name, "labels file name"), 418 Flag("input_width", &input_width, "image input width"), 419 Flag("input_height", &input_height, "image input height"), 420 Flag("input_mean", &input_mean, "transformed mean of input pixels"), 421 Flag("input_std", &input_std, "transformed std dev of input pixels"), 422 Flag("input_layer", &input_layer, "input layer name"), 423 Flag("output_layer", &output_layer, "output layer name"), 424 Flag("video_width", &video_width, "video width expected from device"), 425 Flag("video_height", &video_height, "video height expected from device"), 426 Flag("print_threshold", &print_threshold, 427 "print labels with scoe exceeding this"), 428 Flag("root_dir", &root_dir, 429 "interpret graph file name relative to this directory")}; 430 string usage = tensorflow::Flags::Usage(argv[0], flag_list); 431 const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); 432 433 if (!parse_result || argc != 1) { 434 LOG(ERROR) << "\n" << usage; 435 return -1; 436 } 437 438 // First we load and initialize the model. 439 std::unique_ptr<tensorflow::Session> session; 440 string graph_path = tensorflow::io::JoinPath(root_dir, graph); 441 Status load_graph_status = LoadGraph(graph_path, &session); 442 if (!load_graph_status.ok()) { 443 LOG(ERROR) << load_graph_status; 444 return -1; 445 } 446 447 std::vector<string> labels; 448 size_t label_count; 449 Status read_labels_status = 450 ReadLabelsFile(labels_file_name, &labels, &label_count); 451 if (!read_labels_status.ok()) { 452 LOG(ERROR) << read_labels_status; 453 return -1; 454 } 455 456 int camera_handle; 457 Status open_status = OpenCamera(&camera_handle); 458 if (!open_status.ok()) { 459 LOG(ERROR) << "OpenCamera failed with " << open_status; 460 return -1; 461 } 462 463 Status format_status = 464 SetCameraFormat(camera_handle, video_width, video_height); 465 if (!format_status.ok()) { 466 LOG(ERROR) << "SetCameraFormat failed with " << format_status; 467 return -1; 468 } 469 470 const int how_many_buffers = 2; 471 CameraBuffer* buffers; 472 Status start_capture_status = 473 StartCameraCapture(camera_handle, how_many_buffers, &buffers); 474 if (!start_capture_status.ok()) { 475 LOG(ERROR) << "StartCameraCapture failed with " << start_capture_status; 476 return -1; 477 } 478 479 for (int i = 0; i < 200; i++) { 480 uint8_t* frame_data; 481 int frame_data_size; 482 v4l2_buffer buf; 483 Status capture_next_status = CaptureNextFrame( 484 camera_handle, buffers, &frame_data, &frame_data_size, &buf); 485 if (!capture_next_status.ok()) { 486 LOG(ERROR) << "CaptureNextFrame failed with " << capture_next_status; 487 return -1; 488 } 489 490 std::vector<Tensor> resized_tensors; 491 Status tensor_from_frame_status = 492 TensorFromFrame(frame_data, video_width, video_height, 3, input_height, 493 input_width, input_mean, input_std, &resized_tensors); 494 if (!tensor_from_frame_status.ok()) { 495 LOG(ERROR) << tensor_from_frame_status; 496 return -1; 497 } 498 const Tensor& resized_tensor = resized_tensors[0]; 499 500 Status release_frame_status = ReleaseFrame(camera_handle, &buf); 501 if (!release_frame_status.ok()) { 502 LOG(ERROR) << "ReleaseFrame failed with " << release_frame_status; 503 return -1; 504 } 505 506 // Actually run the image through the model. 507 std::vector<Tensor> outputs; 508 Status run_status = session->Run({{input_layer, resized_tensor}}, 509 {output_layer}, {}, &outputs); 510 if (!run_status.ok()) { 511 LOG(ERROR) << "Running model failed: " << run_status; 512 return -1; 513 } 514 515 // Do something interesting with the results we've generated. 516 Status print_status = 517 PrintTopLabels(outputs, labels, label_count, print_threshold * 0.01f); 518 if (!print_status.ok()) { 519 LOG(ERROR) << "Running print failed: " << print_status; 520 return -1; 521 } 522 } 523 524 Status end_capture_status = 525 EndCameraCapture(camera_handle, buffers, how_many_buffers); 526 if (!end_capture_status.ok()) { 527 LOG(ERROR) << "EndCameraCapture failed with " << end_capture_status; 528 return -1; 529 } 530 531 Status close_status = CloseCamera(camera_handle); 532 if (!close_status.ok()) { 533 LOG(ERROR) << "CloseCamera failed with " << open_status; 534 return -1; 535 } 536 537 return 0; 538 } 539