Home | History | Annotate | Download | only in drive
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/chromeos/drive/search_metadata.h"
      6 
      7 #include <algorithm>
      8 #include <queue>
      9 
     10 #include "base/bind.h"
     11 #include "base/i18n/string_search.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/strings/utf_string_conversions.h"
     14 #include "base/time/time.h"
     15 #include "chrome/browser/chromeos/drive/file_system_util.h"
     16 #include "content/public/browser/browser_thread.h"
     17 #include "net/base/escape.h"
     18 
     19 using content::BrowserThread;
     20 
     21 namespace drive {
     22 namespace internal {
     23 
     24 namespace {
     25 
     26 struct ResultCandidate {
     27   ResultCandidate(const std::string& local_id,
     28                   const ResourceEntry& entry,
     29                   const std::string& highlighted_base_name)
     30       : local_id(local_id),
     31         entry(entry),
     32         highlighted_base_name(highlighted_base_name) {
     33   }
     34 
     35   std::string local_id;
     36   ResourceEntry entry;
     37   std::string highlighted_base_name;
     38 };
     39 
     40 // Used to sort the result candidates per the last accessed/modified time. The
     41 // recently accessed/modified files come first.
     42 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
     43   const PlatformFileInfoProto& a_file_info = a.file_info();
     44   const PlatformFileInfoProto& b_file_info = b.file_info();
     45 
     46   if (a_file_info.last_accessed() != b_file_info.last_accessed())
     47     return a_file_info.last_accessed() > b_file_info.last_accessed();
     48 
     49   // When the entries have the same last access time (which happens quite often
     50   // because Drive server doesn't set the field until an entry is viewed via
     51   // drive.google.com), we use last modified time as the tie breaker.
     52   return a_file_info.last_modified() > b_file_info.last_modified();
     53 }
     54 
     55 struct ResultCandidateComparator {
     56   bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
     57     return CompareByTimestamp(a->entry, b->entry);
     58   }
     59 };
     60 
     61 // A wrapper of std::priority_queue which deals with pointers of values.
     62 template<typename T, typename Compare>
     63 class ScopedPriorityQueue {
     64  public:
     65   ScopedPriorityQueue() {}
     66 
     67   ~ScopedPriorityQueue() {
     68     while (!empty())
     69       pop();
     70   }
     71 
     72   bool empty() const { return queue_.empty(); }
     73 
     74   size_t size() const { return queue_.size(); }
     75 
     76   const T* top() const { return queue_.top(); }
     77 
     78   void push(T* x) { queue_.push(x); }
     79 
     80   void pop() {
     81     // Keep top alive for the pop() call so that debug checks can access
     82     // underlying data (e.g. validating heap property of the priority queue
     83     // will call the comparator).
     84     T* saved_top = queue_.top();
     85     queue_.pop();
     86     delete saved_top;
     87   }
     88 
     89  private:
     90   std::priority_queue<T*, std::vector<T*>, Compare> queue_;
     91 
     92   DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
     93 };
     94 
     95 // Classifies the given entry as trashed if it's placed under the trash.
     96 class TrashedEntryClassifier {
     97  public:
     98   explicit TrashedEntryClassifier(ResourceMetadata* metadata)
     99       : metadata_(metadata) {
    100     trashed_[""] = false;
    101     trashed_[util::kDriveTrashDirLocalId] = true;
    102   }
    103 
    104   // |result| is set to true if |entry| is under the trash.
    105   FileError IsTrashed(const ResourceEntry& entry, bool* result) {
    106     // parent_local_id cannot be used to classify the trash itself.
    107     if (entry.local_id() == util::kDriveTrashDirLocalId) {
    108       *result = true;
    109       return FILE_ERROR_OK;
    110     }
    111 
    112     // Look up for parents recursively.
    113     std::vector<std::string> undetermined_ids;
    114     undetermined_ids.push_back(entry.parent_local_id());
    115 
    116     std::map<std::string, bool>::iterator it =
    117         trashed_.find(undetermined_ids.back());
    118     for (; it == trashed_.end(); it = trashed_.find(undetermined_ids.back())) {
    119       ResourceEntry parent;
    120       FileError error =
    121           metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
    122       if (error != FILE_ERROR_OK)
    123         return error;
    124       undetermined_ids.push_back(parent.parent_local_id());
    125     }
    126 
    127     // Cache the result to |trashed_|.
    128     undetermined_ids.pop_back();  // The last one is already in |trashed_|.
    129     for (size_t i = 0; i < undetermined_ids.size(); ++i)
    130       trashed_[undetermined_ids[i]] = it->second;
    131 
    132     *result = it->second;
    133     return FILE_ERROR_OK;
    134   }
    135 
    136  private:
    137   ResourceMetadata* metadata_;
    138   std::map<std::string, bool> trashed_;  // local ID to is_trashed map.
    139 };
    140 
    141 // Returns true if |entry| is eligible for the search |options| and should be
    142 // tested for the match with the query.  If
    143 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
    144 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
    145 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
    146 // the entries with shared-with-me label will be tested. If
    147 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
    148 // match with the query. This option can not be used with other options.
    149 bool IsEligibleEntry(const ResourceEntry& entry,
    150                      ResourceMetadata::Iterator* it,
    151                      int options) {
    152   if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
    153       entry.file_specific_info().is_hosted_document())
    154     return false;
    155 
    156   if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
    157       entry.file_info().is_directory())
    158     return false;
    159 
    160   if (options & SEARCH_METADATA_SHARED_WITH_ME)
    161     return entry.shared_with_me();
    162 
    163   if (options & SEARCH_METADATA_OFFLINE) {
    164     if (entry.file_specific_info().is_hosted_document())
    165       return true;
    166     FileCacheEntry cache_entry;
    167     it->GetCacheEntry(&cache_entry);
    168     return cache_entry.is_present();
    169   }
    170 
    171   // Exclude "drive", "drive/root", and "drive/other".
    172   if (it->GetID() == util::kDriveGrandRootLocalId ||
    173       entry.parent_local_id() == util::kDriveGrandRootLocalId) {
    174     return false;
    175   }
    176 
    177   return true;
    178 }
    179 
    180 // Used to implement SearchMetadata.
    181 // Adds entry to the result when appropriate.
    182 // In particular, if |query| is non-null, only adds files with the name matching
    183 // the query.
    184 FileError MaybeAddEntryToResult(
    185     ResourceMetadata* resource_metadata,
    186     ResourceMetadata::Iterator* it,
    187     base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
    188     int options,
    189     size_t at_most_num_matches,
    190     TrashedEntryClassifier* trashed_entry_classifier,
    191     ScopedPriorityQueue<ResultCandidate,
    192                         ResultCandidateComparator>* result_candidates) {
    193   DCHECK_GE(at_most_num_matches, result_candidates->size());
    194 
    195   const ResourceEntry& entry = it->GetValue();
    196 
    197   // If the candidate set is already full, and this |entry| is old, do nothing.
    198   // We perform this check first in order to avoid the costly find-and-highlight
    199   // or FilePath lookup as much as possible.
    200   if (result_candidates->size() == at_most_num_matches &&
    201       !CompareByTimestamp(entry, result_candidates->top()->entry))
    202     return FILE_ERROR_OK;
    203 
    204   // Add |entry| to the result if the entry is eligible for the given
    205   // |options| and matches the query. The base name of the entry must
    206   // contain |query| to match the query.
    207   std::string highlighted;
    208   if (!IsEligibleEntry(entry, it, options) ||
    209       (query && !FindAndHighlight(entry.base_name(), query, &highlighted)))
    210     return FILE_ERROR_OK;
    211 
    212   // Trashed entry should not be returned.
    213   bool trashed = false;
    214   FileError error = trashed_entry_classifier->IsTrashed(entry, &trashed);
    215   if (error != FILE_ERROR_OK || trashed)
    216     return error;
    217 
    218   // Make space for |entry| when appropriate.
    219   if (result_candidates->size() == at_most_num_matches)
    220     result_candidates->pop();
    221   result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
    222   return FILE_ERROR_OK;
    223 }
    224 
    225 // Implements SearchMetadata().
    226 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
    227                                        const std::string& query_text,
    228                                        int options,
    229                                        int at_most_num_matches,
    230                                        MetadataSearchResultVector* results) {
    231   ScopedPriorityQueue<ResultCandidate,
    232                       ResultCandidateComparator> result_candidates;
    233 
    234   // Prepare data structure for searching.
    235   base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query(
    236       base::UTF8ToUTF16(query_text));
    237 
    238   // Iterate over entries.
    239   TrashedEntryClassifier trashed_entry_classifier(resource_metadata);
    240   scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
    241   for (; !it->IsAtEnd(); it->Advance()) {
    242     FileError error = MaybeAddEntryToResult(resource_metadata, it.get(),
    243                                             query_text.empty() ? NULL : &query,
    244                                             options,
    245                                             at_most_num_matches,
    246                                             &trashed_entry_classifier,
    247                                             &result_candidates);
    248     if (error != FILE_ERROR_OK)
    249       return error;
    250   }
    251 
    252   // Prepare the result.
    253   for (; !result_candidates.empty(); result_candidates.pop()) {
    254     const ResultCandidate& candidate = *result_candidates.top();
    255     // The path field of entries in result_candidates are empty at this point,
    256     // because we don't want to run the expensive metadata DB look up except for
    257     // the final results. Hence, here we fill the part.
    258     base::FilePath path = resource_metadata->GetFilePath(candidate.local_id);
    259     if (path.empty())
    260       return FILE_ERROR_FAILED;
    261     results->push_back(MetadataSearchResult(
    262         path, candidate.entry, candidate.highlighted_base_name));
    263   }
    264 
    265   // Reverse the order here because |result_candidates| puts the most
    266   // uninteresting candidate at the top.
    267   std::reverse(results->begin(), results->end());
    268 
    269   return FILE_ERROR_OK;
    270 }
    271 
    272 // Runs the SearchMetadataCallback and updates the histogram.
    273 void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
    274                                const base::TimeTicks& start_time,
    275                                scoped_ptr<MetadataSearchResultVector> results,
    276                                FileError error) {
    277   if (error != FILE_ERROR_OK)
    278     results.reset();
    279   callback.Run(error, results.Pass());
    280 
    281   UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
    282                       base::TimeTicks::Now() - start_time);
    283 }
    284 
    285 }  // namespace
    286 
    287 void SearchMetadata(
    288     scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
    289     ResourceMetadata* resource_metadata,
    290     const std::string& query,
    291     int options,
    292     int at_most_num_matches,
    293     const SearchMetadataCallback& callback) {
    294   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    295   DCHECK_LE(0, at_most_num_matches);
    296   DCHECK(!callback.is_null());
    297 
    298   const base::TimeTicks start_time = base::TimeTicks::Now();
    299 
    300   scoped_ptr<MetadataSearchResultVector> results(
    301       new MetadataSearchResultVector);
    302   MetadataSearchResultVector* results_ptr = results.get();
    303   base::PostTaskAndReplyWithResult(blocking_task_runner.get(),
    304                                    FROM_HERE,
    305                                    base::Bind(&SearchMetadataOnBlockingPool,
    306                                               resource_metadata,
    307                                               query,
    308                                               options,
    309                                               at_most_num_matches,
    310                                               results_ptr),
    311                                    base::Bind(&RunSearchMetadataCallback,
    312                                               callback,
    313                                               start_time,
    314                                               base::Passed(&results)));
    315 }
    316 
    317 bool FindAndHighlight(
    318     const std::string& text,
    319     base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
    320     std::string* highlighted_text) {
    321   DCHECK(query);
    322   DCHECK(highlighted_text);
    323   highlighted_text->clear();
    324 
    325   base::string16 text16 = base::UTF8ToUTF16(text);
    326   size_t match_start = 0;
    327   size_t match_length = 0;
    328   if (!query->Search(text16, &match_start, &match_length))
    329     return false;
    330 
    331   base::string16 pre = text16.substr(0, match_start);
    332   base::string16 match = text16.substr(match_start, match_length);
    333   base::string16 post = text16.substr(match_start + match_length);
    334   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre)));
    335   highlighted_text->append("<b>");
    336   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match)));
    337   highlighted_text->append("</b>");
    338   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post)));
    339   return true;
    340 }
    341 
    342 }  // namespace internal
    343 }  // namespace drive
    344