1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/chromeos/drive/search_metadata.h" 6 7 #include <algorithm> 8 #include <queue> 9 10 #include "base/bind.h" 11 #include "base/i18n/string_search.h" 12 #include "base/metrics/histogram.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "base/time/time.h" 15 #include "chrome/browser/chromeos/drive/file_system_util.h" 16 #include "content/public/browser/browser_thread.h" 17 #include "google_apis/drive/gdata_wapi_parser.h" 18 #include "net/base/escape.h" 19 20 using content::BrowserThread; 21 22 namespace drive { 23 namespace internal { 24 25 namespace { 26 27 struct ResultCandidate { 28 ResultCandidate(const std::string& local_id, 29 const ResourceEntry& entry, 30 const std::string& highlighted_base_name) 31 : local_id(local_id), 32 entry(entry), 33 highlighted_base_name(highlighted_base_name) { 34 } 35 36 std::string local_id; 37 ResourceEntry entry; 38 std::string highlighted_base_name; 39 }; 40 41 // Used to sort the result candidates per the last accessed/modified time. The 42 // recently accessed/modified files come first. 43 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) { 44 const PlatformFileInfoProto& a_file_info = a.file_info(); 45 const PlatformFileInfoProto& b_file_info = b.file_info(); 46 47 if (a_file_info.last_accessed() != b_file_info.last_accessed()) 48 return a_file_info.last_accessed() > b_file_info.last_accessed(); 49 50 // When the entries have the same last access time (which happens quite often 51 // because Drive server doesn't set the field until an entry is viewed via 52 // drive.google.com), we use last modified time as the tie breaker. 53 return a_file_info.last_modified() > b_file_info.last_modified(); 54 } 55 56 struct ResultCandidateComparator { 57 bool operator()(const ResultCandidate* a, const ResultCandidate* b) const { 58 return CompareByTimestamp(a->entry, b->entry); 59 } 60 }; 61 62 // A wrapper of std::priority_queue which deals with pointers of values. 63 template<typename T, typename Compare> 64 class ScopedPriorityQueue { 65 public: 66 ScopedPriorityQueue() {} 67 68 ~ScopedPriorityQueue() { 69 while (!empty()) 70 pop(); 71 } 72 73 bool empty() const { return queue_.empty(); } 74 75 size_t size() const { return queue_.size(); } 76 77 const T* top() const { return queue_.top(); } 78 79 void push(T* x) { queue_.push(x); } 80 81 void pop() { 82 // Keep top alive for the pop() call so that debug checks can access 83 // underlying data (e.g. validating heap property of the priority queue 84 // will call the comparator). 85 T* saved_top = queue_.top(); 86 queue_.pop(); 87 delete saved_top; 88 } 89 90 private: 91 std::priority_queue<T*, std::vector<T*>, Compare> queue_; 92 93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue); 94 }; 95 96 // Classifies the given entry as hidden if it's not under specific directories. 97 class HiddenEntryClassifier { 98 public: 99 HiddenEntryClassifier(ResourceMetadata* metadata, 100 const std::string& mydrive_local_id) 101 : metadata_(metadata) { 102 // Only things under My Drive and drive/other are not hidden. 103 is_hiding_child_[mydrive_local_id] = false; 104 is_hiding_child_[util::kDriveOtherDirLocalId] = false; 105 106 // Everything else is hidden, including the directories mentioned above 107 // themselves. 108 is_hiding_child_[""] = true; 109 } 110 111 // |result| is set to true if |entry| is hidden. 112 FileError IsHidden(const ResourceEntry& entry, bool* result) { 113 // Look up for parents recursively. 114 std::vector<std::string> undetermined_ids; 115 undetermined_ids.push_back(entry.parent_local_id()); 116 117 std::map<std::string, bool>::iterator it = 118 is_hiding_child_.find(undetermined_ids.back()); 119 for (; it == is_hiding_child_.end(); 120 it = is_hiding_child_.find(undetermined_ids.back())) { 121 ResourceEntry parent; 122 FileError error = 123 metadata_->GetResourceEntryById(undetermined_ids.back(), &parent); 124 if (error != FILE_ERROR_OK) 125 return error; 126 undetermined_ids.push_back(parent.parent_local_id()); 127 } 128 129 // Cache the result. 130 undetermined_ids.pop_back(); // The last one is already in the map. 131 for (size_t i = 0; i < undetermined_ids.size(); ++i) 132 is_hiding_child_[undetermined_ids[i]] = it->second; 133 134 *result = it->second; 135 return FILE_ERROR_OK; 136 } 137 138 private: 139 ResourceMetadata* metadata_; 140 141 // local ID to is_hidden map. 142 std::map<std::string, bool> is_hiding_child_; 143 }; 144 145 // Returns true if |entry| is eligible for the search |options| and should be 146 // tested for the match with the query. If 147 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents 148 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the 149 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only 150 // the entries with shared-with-me label will be tested. If 151 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files 152 // match with the query. This option can not be used with other options. 153 bool IsEligibleEntry(const ResourceEntry& entry, int options) { 154 if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) && 155 entry.file_specific_info().is_hosted_document()) 156 return false; 157 158 if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) && 159 entry.file_info().is_directory()) 160 return false; 161 162 if (options & SEARCH_METADATA_SHARED_WITH_ME) 163 return entry.shared_with_me(); 164 165 if (options & SEARCH_METADATA_OFFLINE) { 166 if (entry.file_specific_info().is_hosted_document()) { 167 // Not all hosted documents are cached by Drive offline app. 168 // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467 169 switch (google_apis::ResourceEntry::GetEntryKindFromExtension( 170 entry.file_specific_info().document_extension())) { 171 case google_apis::ENTRY_KIND_DOCUMENT: 172 case google_apis::ENTRY_KIND_SPREADSHEET: 173 case google_apis::ENTRY_KIND_PRESENTATION: 174 case google_apis::ENTRY_KIND_DRAWING: 175 return true; 176 default: 177 return false; 178 } 179 } else { 180 return entry.file_specific_info().cache_state().is_present(); 181 } 182 } 183 184 return true; 185 } 186 187 // Used to implement SearchMetadata. 188 // Adds entry to the result when appropriate. 189 // In particular, if |query| is non-null, only adds files with the name matching 190 // the query. 191 FileError MaybeAddEntryToResult( 192 ResourceMetadata* resource_metadata, 193 ResourceMetadata::Iterator* it, 194 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 195 int options, 196 size_t at_most_num_matches, 197 HiddenEntryClassifier* hidden_entry_classifier, 198 ScopedPriorityQueue<ResultCandidate, 199 ResultCandidateComparator>* result_candidates) { 200 DCHECK_GE(at_most_num_matches, result_candidates->size()); 201 202 const ResourceEntry& entry = it->GetValue(); 203 204 // If the candidate set is already full, and this |entry| is old, do nothing. 205 // We perform this check first in order to avoid the costly find-and-highlight 206 // or FilePath lookup as much as possible. 207 if (result_candidates->size() == at_most_num_matches && 208 !CompareByTimestamp(entry, result_candidates->top()->entry)) 209 return FILE_ERROR_OK; 210 211 // Add |entry| to the result if the entry is eligible for the given 212 // |options| and matches the query. The base name of the entry must 213 // contain |query| to match the query. 214 std::string highlighted; 215 if (!IsEligibleEntry(entry, options) || 216 (query && !FindAndHighlight(entry.base_name(), query, &highlighted))) 217 return FILE_ERROR_OK; 218 219 // Hidden entry should not be returned. 220 bool hidden = false; 221 FileError error = hidden_entry_classifier->IsHidden(entry, &hidden); 222 if (error != FILE_ERROR_OK || hidden) 223 return error; 224 225 // Make space for |entry| when appropriate. 226 if (result_candidates->size() == at_most_num_matches) 227 result_candidates->pop(); 228 result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted)); 229 return FILE_ERROR_OK; 230 } 231 232 // Implements SearchMetadata(). 233 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata, 234 const std::string& query_text, 235 int options, 236 int at_most_num_matches, 237 MetadataSearchResultVector* results) { 238 ScopedPriorityQueue<ResultCandidate, 239 ResultCandidateComparator> result_candidates; 240 241 // Prepare data structure for searching. 242 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query( 243 base::UTF8ToUTF16(query_text)); 244 245 // Prepare an object to filter out hidden entries. 246 ResourceEntry mydrive; 247 FileError error = resource_metadata->GetResourceEntryByPath( 248 util::GetDriveMyDriveRootPath(), &mydrive); 249 if (error != FILE_ERROR_OK) 250 return error; 251 HiddenEntryClassifier hidden_entry_classifier(resource_metadata, 252 mydrive.local_id()); 253 254 // Iterate over entries. 255 scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator(); 256 for (; !it->IsAtEnd(); it->Advance()) { 257 FileError error = MaybeAddEntryToResult(resource_metadata, it.get(), 258 query_text.empty() ? NULL : &query, 259 options, 260 at_most_num_matches, 261 &hidden_entry_classifier, 262 &result_candidates); 263 if (error != FILE_ERROR_OK) 264 return error; 265 } 266 267 // Prepare the result. 268 for (; !result_candidates.empty(); result_candidates.pop()) { 269 const ResultCandidate& candidate = *result_candidates.top(); 270 // The path field of entries in result_candidates are empty at this point, 271 // because we don't want to run the expensive metadata DB look up except for 272 // the final results. Hence, here we fill the part. 273 base::FilePath path; 274 error = resource_metadata->GetFilePath(candidate.local_id, &path); 275 if (error != FILE_ERROR_OK) 276 return error; 277 bool is_directory = candidate.entry.file_info().is_directory(); 278 results->push_back(MetadataSearchResult( 279 path, is_directory, candidate.highlighted_base_name)); 280 } 281 282 // Reverse the order here because |result_candidates| puts the most 283 // uninteresting candidate at the top. 284 std::reverse(results->begin(), results->end()); 285 286 return FILE_ERROR_OK; 287 } 288 289 // Runs the SearchMetadataCallback and updates the histogram. 290 void RunSearchMetadataCallback(const SearchMetadataCallback& callback, 291 const base::TimeTicks& start_time, 292 scoped_ptr<MetadataSearchResultVector> results, 293 FileError error) { 294 if (error != FILE_ERROR_OK) 295 results.reset(); 296 callback.Run(error, results.Pass()); 297 298 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime", 299 base::TimeTicks::Now() - start_time); 300 } 301 302 } // namespace 303 304 void SearchMetadata( 305 scoped_refptr<base::SequencedTaskRunner> blocking_task_runner, 306 ResourceMetadata* resource_metadata, 307 const std::string& query, 308 int options, 309 int at_most_num_matches, 310 const SearchMetadataCallback& callback) { 311 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 312 DCHECK_LE(0, at_most_num_matches); 313 DCHECK(!callback.is_null()); 314 315 const base::TimeTicks start_time = base::TimeTicks::Now(); 316 317 scoped_ptr<MetadataSearchResultVector> results( 318 new MetadataSearchResultVector); 319 MetadataSearchResultVector* results_ptr = results.get(); 320 base::PostTaskAndReplyWithResult(blocking_task_runner.get(), 321 FROM_HERE, 322 base::Bind(&SearchMetadataOnBlockingPool, 323 resource_metadata, 324 query, 325 options, 326 at_most_num_matches, 327 results_ptr), 328 base::Bind(&RunSearchMetadataCallback, 329 callback, 330 start_time, 331 base::Passed(&results))); 332 } 333 334 bool FindAndHighlight( 335 const std::string& text, 336 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 337 std::string* highlighted_text) { 338 DCHECK(query); 339 DCHECK(highlighted_text); 340 highlighted_text->clear(); 341 342 base::string16 text16 = base::UTF8ToUTF16(text); 343 size_t match_start = 0; 344 size_t match_length = 0; 345 if (!query->Search(text16, &match_start, &match_length)) 346 return false; 347 348 base::string16 pre = text16.substr(0, match_start); 349 base::string16 match = text16.substr(match_start, match_length); 350 base::string16 post = text16.substr(match_start + match_length); 351 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre))); 352 highlighted_text->append("<b>"); 353 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match))); 354 highlighted_text->append("</b>"); 355 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post))); 356 return true; 357 } 358 359 } // namespace internal 360 } // namespace drive 361