1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #import "RunModelViewController.h" 16 17 #include <fstream> 18 #include <pthread.h> 19 #include <unistd.h> 20 #include <queue> 21 #include <sstream> 22 #include <string> 23 24 #include "tensorflow/core/framework/op_kernel.h" 25 #include "tensorflow/core/public/session.h" 26 27 #include "ios_image_load.h" 28 29 NSString* RunInferenceOnImage(); 30 31 namespace { 32 class IfstreamInputStream : public ::google::protobuf::io::CopyingInputStream { 33 public: 34 explicit IfstreamInputStream(const std::string& file_name) 35 : ifs_(file_name.c_str(), std::ios::in | std::ios::binary) {} 36 ~IfstreamInputStream() { ifs_.close(); } 37 38 int Read(void* buffer, int size) { 39 if (!ifs_) { 40 return -1; 41 } 42 ifs_.read(static_cast<char*>(buffer), size); 43 return (int)ifs_.gcount(); 44 } 45 46 private: 47 std::ifstream ifs_; 48 }; 49 } // namespace 50 51 @interface RunModelViewController () 52 @end 53 54 @implementation RunModelViewController { 55 } 56 57 - (IBAction)getUrl:(id)sender { 58 NSString* inference_result = RunInferenceOnImage(); 59 self.urlContentTextView.text = inference_result; 60 } 61 62 @end 63 64 // Returns the top N confidence values over threshold in the provided vector, 65 // sorted by confidence in descending order. 66 static void GetTopN( 67 const Eigen::TensorMap<Eigen::Tensor<float, 1, Eigen::RowMajor>, 68 Eigen::Aligned>& prediction, 69 const int num_results, const float threshold, 70 std::vector<std::pair<float, int> >* top_results) { 71 // Will contain top N results in ascending order. 72 std::priority_queue<std::pair<float, int>, 73 std::vector<std::pair<float, int> >, 74 std::greater<std::pair<float, int> > > top_result_pq; 75 76 const long count = prediction.size(); 77 for (int i = 0; i < count; ++i) { 78 const float value = prediction(i); 79 80 // Only add it if it beats the threshold and has a chance at being in 81 // the top N. 82 if (value < threshold) { 83 continue; 84 } 85 86 top_result_pq.push(std::pair<float, int>(value, i)); 87 88 // If at capacity, kick the smallest value out. 89 if (top_result_pq.size() > num_results) { 90 top_result_pq.pop(); 91 } 92 } 93 94 // Copy to output vector and reverse into descending order. 95 while (!top_result_pq.empty()) { 96 top_results->push_back(top_result_pq.top()); 97 top_result_pq.pop(); 98 } 99 std::reverse(top_results->begin(), top_results->end()); 100 } 101 102 103 bool PortableReadFileToProto(const std::string& file_name, 104 ::google::protobuf::MessageLite* proto) { 105 ::google::protobuf::io::CopyingInputStreamAdaptor stream( 106 new IfstreamInputStream(file_name)); 107 stream.SetOwnsCopyingStream(true); 108 // TODO(jiayq): the following coded stream is for debugging purposes to allow 109 // one to parse arbitrarily large messages for MessageLite. One most likely 110 // doesn't want to put protobufs larger than 64MB on Android, so we should 111 // eventually remove this and quit loud when a large protobuf is passed in. 112 ::google::protobuf::io::CodedInputStream coded_stream(&stream); 113 // Total bytes hard limit / warning limit are set to 1GB and 512MB 114 // respectively. 115 coded_stream.SetTotalBytesLimit(1024LL << 20, 512LL << 20); 116 return proto->ParseFromCodedStream(&coded_stream); 117 } 118 119 NSString* FilePathForResourceName(NSString* name, NSString* extension) { 120 NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension]; 121 if (file_path == NULL) { 122 LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." 123 << [extension UTF8String] << "' in bundle."; 124 } 125 return file_path; 126 } 127 128 NSString* RunInferenceOnImage() { 129 tensorflow::SessionOptions options; 130 131 tensorflow::Session* session_pointer = nullptr; 132 tensorflow::Status session_status = tensorflow::NewSession(options, &session_pointer); 133 if (!session_status.ok()) { 134 std::string status_string = session_status.ToString(); 135 return [NSString stringWithFormat: @"Session create failed - %s", 136 status_string.c_str()]; 137 } 138 std::unique_ptr<tensorflow::Session> session(session_pointer); 139 LOG(INFO) << "Session created."; 140 141 tensorflow::GraphDef tensorflow_graph; 142 LOG(INFO) << "Graph created."; 143 144 NSString* network_path = FilePathForResourceName(@"tensorflow_inception_graph", @"pb"); 145 PortableReadFileToProto([network_path UTF8String], &tensorflow_graph); 146 147 LOG(INFO) << "Creating session."; 148 tensorflow::Status s = session->Create(tensorflow_graph); 149 if (!s.ok()) { 150 LOG(ERROR) << "Could not create TensorFlow Graph: " << s; 151 return @""; 152 } 153 154 // Read the label list 155 NSString* labels_path = FilePathForResourceName(@"imagenet_comp_graph_label_strings", @"txt"); 156 std::vector<std::string> label_strings; 157 std::ifstream t; 158 t.open([labels_path UTF8String]); 159 std::string line; 160 while(t){ 161 std::getline(t, line); 162 label_strings.push_back(line); 163 } 164 t.close(); 165 166 // Read the Grace Hopper image. 167 NSString* image_path = FilePathForResourceName(@"grace_hopper", @"jpg"); 168 int image_width; 169 int image_height; 170 int image_channels; 171 std::vector<tensorflow::uint8> image_data = LoadImageFromFile( 172 [image_path UTF8String], &image_width, &image_height, &image_channels); 173 const int wanted_width = 224; 174 const int wanted_height = 224; 175 const int wanted_channels = 3; 176 const float input_mean = 117.0f; 177 const float input_std = 1.0f; 178 assert(image_channels >= wanted_channels); 179 tensorflow::Tensor image_tensor( 180 tensorflow::DT_FLOAT, 181 tensorflow::TensorShape({ 182 1, wanted_height, wanted_width, wanted_channels})); 183 auto image_tensor_mapped = image_tensor.tensor<float, 4>(); 184 tensorflow::uint8* in = image_data.data(); 185 // tensorflow::uint8* in_end = (in + (image_height * image_width * image_channels)); 186 float* out = image_tensor_mapped.data(); 187 for (int y = 0; y < wanted_height; ++y) { 188 const int in_y = (y * image_height) / wanted_height; 189 tensorflow::uint8* in_row = in + (in_y * image_width * image_channels); 190 float* out_row = out + (y * wanted_width * wanted_channels); 191 for (int x = 0; x < wanted_width; ++x) { 192 const int in_x = (x * image_width) / wanted_width; 193 tensorflow::uint8* in_pixel = in_row + (in_x * image_channels); 194 float* out_pixel = out_row + (x * wanted_channels); 195 for (int c = 0; c < wanted_channels; ++c) { 196 out_pixel[c] = (in_pixel[c] - input_mean) / input_std; 197 } 198 } 199 } 200 201 NSString* result = [network_path stringByAppendingString: @" - loaded!"]; 202 result = [NSString stringWithFormat: @"%@ - %lu, %s - %dx%d", result, 203 label_strings.size(), label_strings[0].c_str(), image_width, image_height]; 204 205 std::string input_layer = "input"; 206 std::string output_layer = "output"; 207 std::vector<tensorflow::Tensor> outputs; 208 tensorflow::Status run_status = session->Run({{input_layer, image_tensor}}, 209 {output_layer}, {}, &outputs); 210 if (!run_status.ok()) { 211 LOG(ERROR) << "Running model failed: " << run_status; 212 tensorflow::LogAllRegisteredKernels(); 213 result = @"Error running model"; 214 return result; 215 } 216 tensorflow::string status_string = run_status.ToString(); 217 result = [NSString stringWithFormat: @"%@ - %s", result, 218 status_string.c_str()]; 219 220 tensorflow::Tensor* output = &outputs[0]; 221 const int kNumResults = 5; 222 const float kThreshold = 0.1f; 223 std::vector<std::pair<float, int> > top_results; 224 GetTopN(output->flat<float>(), kNumResults, kThreshold, &top_results); 225 226 std::stringstream ss; 227 ss.precision(3); 228 for (const auto& result : top_results) { 229 const float confidence = result.first; 230 const int index = result.second; 231 232 ss << index << " " << confidence << " "; 233 234 // Write out the result as a string 235 if (index < label_strings.size()) { 236 // just for safety: theoretically, the output is under 1000 unless there 237 // is some numerical issues leading to a wrong prediction. 238 ss << label_strings[index]; 239 } else { 240 ss << "Prediction: " << index; 241 } 242 243 ss << "\n"; 244 } 245 246 LOG(INFO) << "Predictions: " << ss.str(); 247 248 tensorflow::string predictions = ss.str(); 249 result = [NSString stringWithFormat: @"%@ - %s", result, 250 predictions.c_str()]; 251 252 return result; 253 } 254