1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/chromeos/drive/search_metadata.h" 6 7 #include <algorithm> 8 #include <queue> 9 10 #include "base/bind.h" 11 #include "base/i18n/string_search.h" 12 #include "base/metrics/histogram.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "base/time/time.h" 15 #include "chrome/browser/chromeos/drive/file_system_util.h" 16 #include "chrome/browser/drive/drive_api_util.h" 17 #include "content/public/browser/browser_thread.h" 18 #include "net/base/escape.h" 19 20 using content::BrowserThread; 21 22 namespace drive { 23 namespace internal { 24 25 namespace { 26 27 struct ResultCandidate { 28 ResultCandidate(const std::string& local_id, 29 const ResourceEntry& entry, 30 const std::string& highlighted_base_name) 31 : local_id(local_id), 32 entry(entry), 33 highlighted_base_name(highlighted_base_name) { 34 } 35 36 std::string local_id; 37 ResourceEntry entry; 38 std::string highlighted_base_name; 39 }; 40 41 // Used to sort the result candidates per the last accessed/modified time. The 42 // recently accessed/modified files come first. 43 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) { 44 const PlatformFileInfoProto& a_file_info = a.file_info(); 45 const PlatformFileInfoProto& b_file_info = b.file_info(); 46 47 if (a_file_info.last_accessed() != b_file_info.last_accessed()) 48 return a_file_info.last_accessed() > b_file_info.last_accessed(); 49 50 // When the entries have the same last access time (which happens quite often 51 // because Drive server doesn't set the field until an entry is viewed via 52 // drive.google.com), we use last modified time as the tie breaker. 53 return a_file_info.last_modified() > b_file_info.last_modified(); 54 } 55 56 struct ResultCandidateComparator { 57 bool operator()(const ResultCandidate* a, const ResultCandidate* b) const { 58 return CompareByTimestamp(a->entry, b->entry); 59 } 60 }; 61 62 // A wrapper of std::priority_queue which deals with pointers of values. 63 template<typename T, typename Compare> 64 class ScopedPriorityQueue { 65 public: 66 ScopedPriorityQueue() {} 67 68 ~ScopedPriorityQueue() { 69 while (!empty()) 70 pop(); 71 } 72 73 bool empty() const { return queue_.empty(); } 74 75 size_t size() const { return queue_.size(); } 76 77 const T* top() const { return queue_.top(); } 78 79 void push(T* x) { queue_.push(x); } 80 81 void pop() { 82 // Keep top alive for the pop() call so that debug checks can access 83 // underlying data (e.g. validating heap property of the priority queue 84 // will call the comparator). 85 T* saved_top = queue_.top(); 86 queue_.pop(); 87 delete saved_top; 88 } 89 90 private: 91 std::priority_queue<T*, std::vector<T*>, Compare> queue_; 92 93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue); 94 }; 95 96 // Classifies the given entry as hidden if it's not under specific directories. 97 class HiddenEntryClassifier { 98 public: 99 HiddenEntryClassifier(ResourceMetadata* metadata, 100 const std::string& mydrive_local_id) 101 : metadata_(metadata) { 102 // Only things under My Drive and drive/other are not hidden. 103 is_hiding_child_[mydrive_local_id] = false; 104 is_hiding_child_[util::kDriveOtherDirLocalId] = false; 105 106 // Everything else is hidden, including the directories mentioned above 107 // themselves. 108 is_hiding_child_[""] = true; 109 } 110 111 // |result| is set to true if |entry| is hidden. 112 FileError IsHidden(const ResourceEntry& entry, bool* result) { 113 // Look up for parents recursively. 114 std::vector<std::string> undetermined_ids; 115 undetermined_ids.push_back(entry.parent_local_id()); 116 117 std::map<std::string, bool>::iterator it = 118 is_hiding_child_.find(undetermined_ids.back()); 119 for (; it == is_hiding_child_.end(); 120 it = is_hiding_child_.find(undetermined_ids.back())) { 121 ResourceEntry parent; 122 FileError error = 123 metadata_->GetResourceEntryById(undetermined_ids.back(), &parent); 124 if (error != FILE_ERROR_OK) 125 return error; 126 undetermined_ids.push_back(parent.parent_local_id()); 127 } 128 129 // Cache the result. 130 undetermined_ids.pop_back(); // The last one is already in the map. 131 for (size_t i = 0; i < undetermined_ids.size(); ++i) 132 is_hiding_child_[undetermined_ids[i]] = it->second; 133 134 *result = it->second; 135 return FILE_ERROR_OK; 136 } 137 138 private: 139 ResourceMetadata* metadata_; 140 141 // local ID to is_hidden map. 142 std::map<std::string, bool> is_hiding_child_; 143 }; 144 145 // Returns true if |entry| is eligible for the search |options| and should be 146 // tested for the match with the query. If 147 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents 148 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the 149 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only 150 // the entries with shared-with-me label will be tested. If 151 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files 152 // match with the query. This option can not be used with other options. 153 bool IsEligibleEntry(const ResourceEntry& entry, int options) { 154 if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) && 155 entry.file_specific_info().is_hosted_document()) 156 return false; 157 158 if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) && 159 entry.file_info().is_directory()) 160 return false; 161 162 if (options & SEARCH_METADATA_SHARED_WITH_ME) 163 return entry.shared_with_me(); 164 165 if (options & SEARCH_METADATA_OFFLINE) { 166 if (entry.file_specific_info().is_hosted_document()) { 167 // Not all hosted documents are cached by Drive offline app. 168 // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467 169 std::string mime_type = entry.file_specific_info().content_mime_type(); 170 return mime_type == drive::util::kGoogleDocumentMimeType || 171 mime_type == drive::util::kGoogleSpreadsheetMimeType || 172 mime_type == drive::util::kGooglePresentationMimeType || 173 mime_type == drive::util::kGoogleDrawingMimeType; 174 } else { 175 return entry.file_specific_info().cache_state().is_present(); 176 } 177 } 178 179 return true; 180 } 181 182 // Used to implement SearchMetadata. 183 // Adds entry to the result when appropriate. 184 // In particular, if |query| is non-null, only adds files with the name matching 185 // the query. 186 FileError MaybeAddEntryToResult( 187 ResourceMetadata* resource_metadata, 188 ResourceMetadata::Iterator* it, 189 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 190 int options, 191 size_t at_most_num_matches, 192 HiddenEntryClassifier* hidden_entry_classifier, 193 ScopedPriorityQueue<ResultCandidate, 194 ResultCandidateComparator>* result_candidates) { 195 DCHECK_GE(at_most_num_matches, result_candidates->size()); 196 197 const ResourceEntry& entry = it->GetValue(); 198 199 // If the candidate set is already full, and this |entry| is old, do nothing. 200 // We perform this check first in order to avoid the costly find-and-highlight 201 // or FilePath lookup as much as possible. 202 if (result_candidates->size() == at_most_num_matches && 203 !CompareByTimestamp(entry, result_candidates->top()->entry)) 204 return FILE_ERROR_OK; 205 206 // Add |entry| to the result if the entry is eligible for the given 207 // |options| and matches the query. The base name of the entry must 208 // contain |query| to match the query. 209 std::string highlighted; 210 if (!IsEligibleEntry(entry, options) || 211 (query && !FindAndHighlight(entry.base_name(), query, &highlighted))) 212 return FILE_ERROR_OK; 213 214 // Hidden entry should not be returned. 215 bool hidden = false; 216 FileError error = hidden_entry_classifier->IsHidden(entry, &hidden); 217 if (error != FILE_ERROR_OK || hidden) 218 return error; 219 220 // Make space for |entry| when appropriate. 221 if (result_candidates->size() == at_most_num_matches) 222 result_candidates->pop(); 223 result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted)); 224 return FILE_ERROR_OK; 225 } 226 227 // Implements SearchMetadata(). 228 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata, 229 const std::string& query_text, 230 int options, 231 int at_most_num_matches, 232 MetadataSearchResultVector* results) { 233 ScopedPriorityQueue<ResultCandidate, 234 ResultCandidateComparator> result_candidates; 235 236 // Prepare data structure for searching. 237 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query( 238 base::UTF8ToUTF16(query_text)); 239 240 // Prepare an object to filter out hidden entries. 241 ResourceEntry mydrive; 242 FileError error = resource_metadata->GetResourceEntryByPath( 243 util::GetDriveMyDriveRootPath(), &mydrive); 244 if (error != FILE_ERROR_OK) 245 return error; 246 HiddenEntryClassifier hidden_entry_classifier(resource_metadata, 247 mydrive.local_id()); 248 249 // Iterate over entries. 250 scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator(); 251 for (; !it->IsAtEnd(); it->Advance()) { 252 FileError error = MaybeAddEntryToResult(resource_metadata, it.get(), 253 query_text.empty() ? NULL : &query, 254 options, 255 at_most_num_matches, 256 &hidden_entry_classifier, 257 &result_candidates); 258 if (error != FILE_ERROR_OK) 259 return error; 260 } 261 262 // Prepare the result. 263 for (; !result_candidates.empty(); result_candidates.pop()) { 264 const ResultCandidate& candidate = *result_candidates.top(); 265 // The path field of entries in result_candidates are empty at this point, 266 // because we don't want to run the expensive metadata DB look up except for 267 // the final results. Hence, here we fill the part. 268 base::FilePath path; 269 error = resource_metadata->GetFilePath(candidate.local_id, &path); 270 if (error != FILE_ERROR_OK) 271 return error; 272 bool is_directory = candidate.entry.file_info().is_directory(); 273 results->push_back(MetadataSearchResult( 274 path, is_directory, candidate.highlighted_base_name)); 275 } 276 277 // Reverse the order here because |result_candidates| puts the most 278 // uninteresting candidate at the top. 279 std::reverse(results->begin(), results->end()); 280 281 return FILE_ERROR_OK; 282 } 283 284 // Runs the SearchMetadataCallback and updates the histogram. 285 void RunSearchMetadataCallback(const SearchMetadataCallback& callback, 286 const base::TimeTicks& start_time, 287 scoped_ptr<MetadataSearchResultVector> results, 288 FileError error) { 289 if (error != FILE_ERROR_OK) 290 results.reset(); 291 callback.Run(error, results.Pass()); 292 293 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime", 294 base::TimeTicks::Now() - start_time); 295 } 296 297 } // namespace 298 299 void SearchMetadata( 300 scoped_refptr<base::SequencedTaskRunner> blocking_task_runner, 301 ResourceMetadata* resource_metadata, 302 const std::string& query, 303 int options, 304 int at_most_num_matches, 305 const SearchMetadataCallback& callback) { 306 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 307 DCHECK_LE(0, at_most_num_matches); 308 DCHECK(!callback.is_null()); 309 310 const base::TimeTicks start_time = base::TimeTicks::Now(); 311 312 scoped_ptr<MetadataSearchResultVector> results( 313 new MetadataSearchResultVector); 314 MetadataSearchResultVector* results_ptr = results.get(); 315 base::PostTaskAndReplyWithResult(blocking_task_runner.get(), 316 FROM_HERE, 317 base::Bind(&SearchMetadataOnBlockingPool, 318 resource_metadata, 319 query, 320 options, 321 at_most_num_matches, 322 results_ptr), 323 base::Bind(&RunSearchMetadataCallback, 324 callback, 325 start_time, 326 base::Passed(&results))); 327 } 328 329 bool FindAndHighlight( 330 const std::string& text, 331 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 332 std::string* highlighted_text) { 333 DCHECK(query); 334 DCHECK(highlighted_text); 335 highlighted_text->clear(); 336 337 base::string16 text16 = base::UTF8ToUTF16(text); 338 size_t match_start = 0; 339 size_t match_length = 0; 340 if (!query->Search(text16, &match_start, &match_length)) 341 return false; 342 343 base::string16 pre = text16.substr(0, match_start); 344 base::string16 match = text16.substr(match_start, match_length); 345 base::string16 post = text16.substr(match_start + match_length); 346 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre))); 347 highlighted_text->append("<b>"); 348 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match))); 349 highlighted_text->append("</b>"); 350 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post))); 351 return true; 352 } 353 354 } // namespace internal 355 } // namespace drive 356