1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/chromeos/drive/search_metadata.h" 6 7 #include <algorithm> 8 #include <queue> 9 10 #include "base/bind.h" 11 #include "base/i18n/string_search.h" 12 #include "base/metrics/histogram.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "base/time/time.h" 15 #include "chrome/browser/chromeos/drive/file_system_util.h" 16 #include "content/public/browser/browser_thread.h" 17 #include "net/base/escape.h" 18 19 using content::BrowserThread; 20 21 namespace drive { 22 namespace internal { 23 24 namespace { 25 26 struct ResultCandidate { 27 ResultCandidate(const std::string& local_id, 28 const ResourceEntry& entry, 29 const std::string& highlighted_base_name) 30 : local_id(local_id), 31 entry(entry), 32 highlighted_base_name(highlighted_base_name) { 33 } 34 35 std::string local_id; 36 ResourceEntry entry; 37 std::string highlighted_base_name; 38 }; 39 40 // Used to sort the result candidates per the last accessed/modified time. The 41 // recently accessed/modified files come first. 42 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) { 43 const PlatformFileInfoProto& a_file_info = a.file_info(); 44 const PlatformFileInfoProto& b_file_info = b.file_info(); 45 46 if (a_file_info.last_accessed() != b_file_info.last_accessed()) 47 return a_file_info.last_accessed() > b_file_info.last_accessed(); 48 49 // When the entries have the same last access time (which happens quite often 50 // because Drive server doesn't set the field until an entry is viewed via 51 // drive.google.com), we use last modified time as the tie breaker. 52 return a_file_info.last_modified() > b_file_info.last_modified(); 53 } 54 55 struct ResultCandidateComparator { 56 bool operator()(const ResultCandidate* a, const ResultCandidate* b) const { 57 return CompareByTimestamp(a->entry, b->entry); 58 } 59 }; 60 61 // A wrapper of std::priority_queue which deals with pointers of values. 62 template<typename T, typename Compare> 63 class ScopedPriorityQueue { 64 public: 65 ScopedPriorityQueue() {} 66 67 ~ScopedPriorityQueue() { 68 while (!empty()) 69 pop(); 70 } 71 72 bool empty() const { return queue_.empty(); } 73 74 size_t size() const { return queue_.size(); } 75 76 const T* top() const { return queue_.top(); } 77 78 void push(T* x) { queue_.push(x); } 79 80 void pop() { 81 // Keep top alive for the pop() call so that debug checks can access 82 // underlying data (e.g. validating heap property of the priority queue 83 // will call the comparator). 84 T* saved_top = queue_.top(); 85 queue_.pop(); 86 delete saved_top; 87 } 88 89 private: 90 std::priority_queue<T*, std::vector<T*>, Compare> queue_; 91 92 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue); 93 }; 94 95 // Classifies the given entry as trashed if it's placed under the trash. 96 class TrashedEntryClassifier { 97 public: 98 explicit TrashedEntryClassifier(ResourceMetadata* metadata) 99 : metadata_(metadata) { 100 trashed_[""] = false; 101 trashed_[util::kDriveTrashDirLocalId] = true; 102 } 103 104 // |result| is set to true if |entry| is under the trash. 105 FileError IsTrashed(const ResourceEntry& entry, bool* result) { 106 // parent_local_id cannot be used to classify the trash itself. 107 if (entry.local_id() == util::kDriveTrashDirLocalId) { 108 *result = true; 109 return FILE_ERROR_OK; 110 } 111 112 // Look up for parents recursively. 113 std::vector<std::string> undetermined_ids; 114 undetermined_ids.push_back(entry.parent_local_id()); 115 116 std::map<std::string, bool>::iterator it = 117 trashed_.find(undetermined_ids.back()); 118 for (; it == trashed_.end(); it = trashed_.find(undetermined_ids.back())) { 119 ResourceEntry parent; 120 FileError error = 121 metadata_->GetResourceEntryById(undetermined_ids.back(), &parent); 122 if (error != FILE_ERROR_OK) 123 return error; 124 undetermined_ids.push_back(parent.parent_local_id()); 125 } 126 127 // Cache the result to |trashed_|. 128 undetermined_ids.pop_back(); // The last one is already in |trashed_|. 129 for (size_t i = 0; i < undetermined_ids.size(); ++i) 130 trashed_[undetermined_ids[i]] = it->second; 131 132 *result = it->second; 133 return FILE_ERROR_OK; 134 } 135 136 private: 137 ResourceMetadata* metadata_; 138 std::map<std::string, bool> trashed_; // local ID to is_trashed map. 139 }; 140 141 // Returns true if |entry| is eligible for the search |options| and should be 142 // tested for the match with the query. If 143 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents 144 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the 145 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only 146 // the entries with shared-with-me label will be tested. If 147 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files 148 // match with the query. This option can not be used with other options. 149 bool IsEligibleEntry(const ResourceEntry& entry, 150 ResourceMetadata::Iterator* it, 151 int options) { 152 if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) && 153 entry.file_specific_info().is_hosted_document()) 154 return false; 155 156 if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) && 157 entry.file_info().is_directory()) 158 return false; 159 160 if (options & SEARCH_METADATA_SHARED_WITH_ME) 161 return entry.shared_with_me(); 162 163 if (options & SEARCH_METADATA_OFFLINE) { 164 if (entry.file_specific_info().is_hosted_document()) 165 return true; 166 FileCacheEntry cache_entry; 167 it->GetCacheEntry(&cache_entry); 168 return cache_entry.is_present(); 169 } 170 171 // Exclude "drive", "drive/root", and "drive/other". 172 if (it->GetID() == util::kDriveGrandRootLocalId || 173 entry.parent_local_id() == util::kDriveGrandRootLocalId) { 174 return false; 175 } 176 177 return true; 178 } 179 180 // Used to implement SearchMetadata. 181 // Adds entry to the result when appropriate. 182 // In particular, if |query| is non-null, only adds files with the name matching 183 // the query. 184 FileError MaybeAddEntryToResult( 185 ResourceMetadata* resource_metadata, 186 ResourceMetadata::Iterator* it, 187 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 188 int options, 189 size_t at_most_num_matches, 190 TrashedEntryClassifier* trashed_entry_classifier, 191 ScopedPriorityQueue<ResultCandidate, 192 ResultCandidateComparator>* result_candidates) { 193 DCHECK_GE(at_most_num_matches, result_candidates->size()); 194 195 const ResourceEntry& entry = it->GetValue(); 196 197 // If the candidate set is already full, and this |entry| is old, do nothing. 198 // We perform this check first in order to avoid the costly find-and-highlight 199 // or FilePath lookup as much as possible. 200 if (result_candidates->size() == at_most_num_matches && 201 !CompareByTimestamp(entry, result_candidates->top()->entry)) 202 return FILE_ERROR_OK; 203 204 // Add |entry| to the result if the entry is eligible for the given 205 // |options| and matches the query. The base name of the entry must 206 // contain |query| to match the query. 207 std::string highlighted; 208 if (!IsEligibleEntry(entry, it, options) || 209 (query && !FindAndHighlight(entry.base_name(), query, &highlighted))) 210 return FILE_ERROR_OK; 211 212 // Trashed entry should not be returned. 213 bool trashed = false; 214 FileError error = trashed_entry_classifier->IsTrashed(entry, &trashed); 215 if (error != FILE_ERROR_OK || trashed) 216 return error; 217 218 // Make space for |entry| when appropriate. 219 if (result_candidates->size() == at_most_num_matches) 220 result_candidates->pop(); 221 result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted)); 222 return FILE_ERROR_OK; 223 } 224 225 // Implements SearchMetadata(). 226 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata, 227 const std::string& query_text, 228 int options, 229 int at_most_num_matches, 230 MetadataSearchResultVector* results) { 231 ScopedPriorityQueue<ResultCandidate, 232 ResultCandidateComparator> result_candidates; 233 234 // Prepare data structure for searching. 235 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query( 236 base::UTF8ToUTF16(query_text)); 237 238 // Iterate over entries. 239 TrashedEntryClassifier trashed_entry_classifier(resource_metadata); 240 scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator(); 241 for (; !it->IsAtEnd(); it->Advance()) { 242 FileError error = MaybeAddEntryToResult(resource_metadata, it.get(), 243 query_text.empty() ? NULL : &query, 244 options, 245 at_most_num_matches, 246 &trashed_entry_classifier, 247 &result_candidates); 248 if (error != FILE_ERROR_OK) 249 return error; 250 } 251 252 // Prepare the result. 253 for (; !result_candidates.empty(); result_candidates.pop()) { 254 const ResultCandidate& candidate = *result_candidates.top(); 255 // The path field of entries in result_candidates are empty at this point, 256 // because we don't want to run the expensive metadata DB look up except for 257 // the final results. Hence, here we fill the part. 258 base::FilePath path = resource_metadata->GetFilePath(candidate.local_id); 259 if (path.empty()) 260 return FILE_ERROR_FAILED; 261 results->push_back(MetadataSearchResult( 262 path, candidate.entry, candidate.highlighted_base_name)); 263 } 264 265 // Reverse the order here because |result_candidates| puts the most 266 // uninteresting candidate at the top. 267 std::reverse(results->begin(), results->end()); 268 269 return FILE_ERROR_OK; 270 } 271 272 // Runs the SearchMetadataCallback and updates the histogram. 273 void RunSearchMetadataCallback(const SearchMetadataCallback& callback, 274 const base::TimeTicks& start_time, 275 scoped_ptr<MetadataSearchResultVector> results, 276 FileError error) { 277 if (error != FILE_ERROR_OK) 278 results.reset(); 279 callback.Run(error, results.Pass()); 280 281 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime", 282 base::TimeTicks::Now() - start_time); 283 } 284 285 } // namespace 286 287 void SearchMetadata( 288 scoped_refptr<base::SequencedTaskRunner> blocking_task_runner, 289 ResourceMetadata* resource_metadata, 290 const std::string& query, 291 int options, 292 int at_most_num_matches, 293 const SearchMetadataCallback& callback) { 294 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 295 DCHECK_LE(0, at_most_num_matches); 296 DCHECK(!callback.is_null()); 297 298 const base::TimeTicks start_time = base::TimeTicks::Now(); 299 300 scoped_ptr<MetadataSearchResultVector> results( 301 new MetadataSearchResultVector); 302 MetadataSearchResultVector* results_ptr = results.get(); 303 base::PostTaskAndReplyWithResult(blocking_task_runner.get(), 304 FROM_HERE, 305 base::Bind(&SearchMetadataOnBlockingPool, 306 resource_metadata, 307 query, 308 options, 309 at_most_num_matches, 310 results_ptr), 311 base::Bind(&RunSearchMetadataCallback, 312 callback, 313 start_time, 314 base::Passed(&results))); 315 } 316 317 bool FindAndHighlight( 318 const std::string& text, 319 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 320 std::string* highlighted_text) { 321 DCHECK(query); 322 DCHECK(highlighted_text); 323 highlighted_text->clear(); 324 325 base::string16 text16 = base::UTF8ToUTF16(text); 326 size_t match_start = 0; 327 size_t match_length = 0; 328 if (!query->Search(text16, &match_start, &match_length)) 329 return false; 330 331 base::string16 pre = text16.substr(0, match_start); 332 base::string16 match = text16.substr(match_start, match_length); 333 base::string16 post = text16.substr(match_start + match_length); 334 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre))); 335 highlighted_text->append("<b>"); 336 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match))); 337 highlighted_text->append("</b>"); 338 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post))); 339 return true; 340 } 341 342 } // namespace internal 343 } // namespace drive 344