Home | History | Annotate | Download | only in drive
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/chromeos/drive/search_metadata.h"
      6 
      7 #include <algorithm>
      8 #include <queue>
      9 
     10 #include "base/bind.h"
     11 #include "base/i18n/string_search.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/strings/utf_string_conversions.h"
     14 #include "base/time/time.h"
     15 #include "chrome/browser/chromeos/drive/file_system_util.h"
     16 #include "chrome/browser/drive/drive_api_util.h"
     17 #include "content/public/browser/browser_thread.h"
     18 #include "net/base/escape.h"
     19 
     20 using content::BrowserThread;
     21 
     22 namespace drive {
     23 namespace internal {
     24 
     25 namespace {
     26 
     27 struct ResultCandidate {
     28   ResultCandidate(const std::string& local_id,
     29                   const ResourceEntry& entry,
     30                   const std::string& highlighted_base_name)
     31       : local_id(local_id),
     32         entry(entry),
     33         highlighted_base_name(highlighted_base_name) {
     34   }
     35 
     36   std::string local_id;
     37   ResourceEntry entry;
     38   std::string highlighted_base_name;
     39 };
     40 
     41 // Used to sort the result candidates per the last accessed/modified time. The
     42 // recently accessed/modified files come first.
     43 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
     44   const PlatformFileInfoProto& a_file_info = a.file_info();
     45   const PlatformFileInfoProto& b_file_info = b.file_info();
     46 
     47   if (a_file_info.last_accessed() != b_file_info.last_accessed())
     48     return a_file_info.last_accessed() > b_file_info.last_accessed();
     49 
     50   // When the entries have the same last access time (which happens quite often
     51   // because Drive server doesn't set the field until an entry is viewed via
     52   // drive.google.com), we use last modified time as the tie breaker.
     53   return a_file_info.last_modified() > b_file_info.last_modified();
     54 }
     55 
     56 struct ResultCandidateComparator {
     57   bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
     58     return CompareByTimestamp(a->entry, b->entry);
     59   }
     60 };
     61 
     62 // A wrapper of std::priority_queue which deals with pointers of values.
     63 template<typename T, typename Compare>
     64 class ScopedPriorityQueue {
     65  public:
     66   ScopedPriorityQueue() {}
     67 
     68   ~ScopedPriorityQueue() {
     69     while (!empty())
     70       pop();
     71   }
     72 
     73   bool empty() const { return queue_.empty(); }
     74 
     75   size_t size() const { return queue_.size(); }
     76 
     77   const T* top() const { return queue_.top(); }
     78 
     79   void push(T* x) { queue_.push(x); }
     80 
     81   void pop() {
     82     // Keep top alive for the pop() call so that debug checks can access
     83     // underlying data (e.g. validating heap property of the priority queue
     84     // will call the comparator).
     85     T* saved_top = queue_.top();
     86     queue_.pop();
     87     delete saved_top;
     88   }
     89 
     90  private:
     91   std::priority_queue<T*, std::vector<T*>, Compare> queue_;
     92 
     93   DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
     94 };
     95 
     96 // Classifies the given entry as hidden if it's not under specific directories.
     97 class HiddenEntryClassifier {
     98  public:
     99   HiddenEntryClassifier(ResourceMetadata* metadata,
    100                         const std::string& mydrive_local_id)
    101       : metadata_(metadata) {
    102     // Only things under My Drive and drive/other are not hidden.
    103     is_hiding_child_[mydrive_local_id] = false;
    104     is_hiding_child_[util::kDriveOtherDirLocalId] = false;
    105 
    106     // Everything else is hidden, including the directories mentioned above
    107     // themselves.
    108     is_hiding_child_[""] = true;
    109   }
    110 
    111   // |result| is set to true if |entry| is hidden.
    112   FileError IsHidden(const ResourceEntry& entry, bool* result) {
    113     // Look up for parents recursively.
    114     std::vector<std::string> undetermined_ids;
    115     undetermined_ids.push_back(entry.parent_local_id());
    116 
    117     std::map<std::string, bool>::iterator it =
    118         is_hiding_child_.find(undetermined_ids.back());
    119     for (; it == is_hiding_child_.end();
    120          it = is_hiding_child_.find(undetermined_ids.back())) {
    121       ResourceEntry parent;
    122       FileError error =
    123           metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
    124       if (error != FILE_ERROR_OK)
    125         return error;
    126       undetermined_ids.push_back(parent.parent_local_id());
    127     }
    128 
    129     // Cache the result.
    130     undetermined_ids.pop_back();  // The last one is already in the map.
    131     for (size_t i = 0; i < undetermined_ids.size(); ++i)
    132       is_hiding_child_[undetermined_ids[i]] = it->second;
    133 
    134     *result = it->second;
    135     return FILE_ERROR_OK;
    136   }
    137 
    138  private:
    139   ResourceMetadata* metadata_;
    140 
    141   // local ID to is_hidden map.
    142   std::map<std::string, bool> is_hiding_child_;
    143 };
    144 
    145 // Returns true if |entry| is eligible for the search |options| and should be
    146 // tested for the match with the query.  If
    147 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
    148 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
    149 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
    150 // the entries with shared-with-me label will be tested. If
    151 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
    152 // match with the query. This option can not be used with other options.
    153 bool IsEligibleEntry(const ResourceEntry& entry, int options) {
    154   if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
    155       entry.file_specific_info().is_hosted_document())
    156     return false;
    157 
    158   if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
    159       entry.file_info().is_directory())
    160     return false;
    161 
    162   if (options & SEARCH_METADATA_SHARED_WITH_ME)
    163     return entry.shared_with_me();
    164 
    165   if (options & SEARCH_METADATA_OFFLINE) {
    166     if (entry.file_specific_info().is_hosted_document()) {
    167       // Not all hosted documents are cached by Drive offline app.
    168       // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467
    169       std::string mime_type = entry.file_specific_info().content_mime_type();
    170       return mime_type == drive::util::kGoogleDocumentMimeType ||
    171              mime_type == drive::util::kGoogleSpreadsheetMimeType ||
    172              mime_type == drive::util::kGooglePresentationMimeType ||
    173              mime_type == drive::util::kGoogleDrawingMimeType;
    174     } else {
    175       return entry.file_specific_info().cache_state().is_present();
    176     }
    177   }
    178 
    179   return true;
    180 }
    181 
    182 // Used to implement SearchMetadata.
    183 // Adds entry to the result when appropriate.
    184 // In particular, if |query| is non-null, only adds files with the name matching
    185 // the query.
    186 FileError MaybeAddEntryToResult(
    187     ResourceMetadata* resource_metadata,
    188     ResourceMetadata::Iterator* it,
    189     base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
    190     int options,
    191     size_t at_most_num_matches,
    192     HiddenEntryClassifier* hidden_entry_classifier,
    193     ScopedPriorityQueue<ResultCandidate,
    194                         ResultCandidateComparator>* result_candidates) {
    195   DCHECK_GE(at_most_num_matches, result_candidates->size());
    196 
    197   const ResourceEntry& entry = it->GetValue();
    198 
    199   // If the candidate set is already full, and this |entry| is old, do nothing.
    200   // We perform this check first in order to avoid the costly find-and-highlight
    201   // or FilePath lookup as much as possible.
    202   if (result_candidates->size() == at_most_num_matches &&
    203       !CompareByTimestamp(entry, result_candidates->top()->entry))
    204     return FILE_ERROR_OK;
    205 
    206   // Add |entry| to the result if the entry is eligible for the given
    207   // |options| and matches the query. The base name of the entry must
    208   // contain |query| to match the query.
    209   std::string highlighted;
    210   if (!IsEligibleEntry(entry, options) ||
    211       (query && !FindAndHighlight(entry.base_name(), query, &highlighted)))
    212     return FILE_ERROR_OK;
    213 
    214   // Hidden entry should not be returned.
    215   bool hidden = false;
    216   FileError error = hidden_entry_classifier->IsHidden(entry, &hidden);
    217   if (error != FILE_ERROR_OK || hidden)
    218     return error;
    219 
    220   // Make space for |entry| when appropriate.
    221   if (result_candidates->size() == at_most_num_matches)
    222     result_candidates->pop();
    223   result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
    224   return FILE_ERROR_OK;
    225 }
    226 
    227 // Implements SearchMetadata().
    228 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
    229                                        const std::string& query_text,
    230                                        int options,
    231                                        int at_most_num_matches,
    232                                        MetadataSearchResultVector* results) {
    233   ScopedPriorityQueue<ResultCandidate,
    234                       ResultCandidateComparator> result_candidates;
    235 
    236   // Prepare data structure for searching.
    237   base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query(
    238       base::UTF8ToUTF16(query_text));
    239 
    240   // Prepare an object to filter out hidden entries.
    241   ResourceEntry mydrive;
    242   FileError error = resource_metadata->GetResourceEntryByPath(
    243       util::GetDriveMyDriveRootPath(), &mydrive);
    244   if (error != FILE_ERROR_OK)
    245     return error;
    246   HiddenEntryClassifier hidden_entry_classifier(resource_metadata,
    247                                                 mydrive.local_id());
    248 
    249   // Iterate over entries.
    250   scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
    251   for (; !it->IsAtEnd(); it->Advance()) {
    252     FileError error = MaybeAddEntryToResult(resource_metadata, it.get(),
    253                                             query_text.empty() ? NULL : &query,
    254                                             options,
    255                                             at_most_num_matches,
    256                                             &hidden_entry_classifier,
    257                                             &result_candidates);
    258     if (error != FILE_ERROR_OK)
    259       return error;
    260   }
    261 
    262   // Prepare the result.
    263   for (; !result_candidates.empty(); result_candidates.pop()) {
    264     const ResultCandidate& candidate = *result_candidates.top();
    265     // The path field of entries in result_candidates are empty at this point,
    266     // because we don't want to run the expensive metadata DB look up except for
    267     // the final results. Hence, here we fill the part.
    268     base::FilePath path;
    269     error = resource_metadata->GetFilePath(candidate.local_id, &path);
    270     if (error != FILE_ERROR_OK)
    271       return error;
    272     bool is_directory = candidate.entry.file_info().is_directory();
    273     results->push_back(MetadataSearchResult(
    274         path, is_directory, candidate.highlighted_base_name));
    275   }
    276 
    277   // Reverse the order here because |result_candidates| puts the most
    278   // uninteresting candidate at the top.
    279   std::reverse(results->begin(), results->end());
    280 
    281   return FILE_ERROR_OK;
    282 }
    283 
    284 // Runs the SearchMetadataCallback and updates the histogram.
    285 void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
    286                                const base::TimeTicks& start_time,
    287                                scoped_ptr<MetadataSearchResultVector> results,
    288                                FileError error) {
    289   if (error != FILE_ERROR_OK)
    290     results.reset();
    291   callback.Run(error, results.Pass());
    292 
    293   UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
    294                       base::TimeTicks::Now() - start_time);
    295 }
    296 
    297 }  // namespace
    298 
    299 void SearchMetadata(
    300     scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
    301     ResourceMetadata* resource_metadata,
    302     const std::string& query,
    303     int options,
    304     int at_most_num_matches,
    305     const SearchMetadataCallback& callback) {
    306   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    307   DCHECK_LE(0, at_most_num_matches);
    308   DCHECK(!callback.is_null());
    309 
    310   const base::TimeTicks start_time = base::TimeTicks::Now();
    311 
    312   scoped_ptr<MetadataSearchResultVector> results(
    313       new MetadataSearchResultVector);
    314   MetadataSearchResultVector* results_ptr = results.get();
    315   base::PostTaskAndReplyWithResult(blocking_task_runner.get(),
    316                                    FROM_HERE,
    317                                    base::Bind(&SearchMetadataOnBlockingPool,
    318                                               resource_metadata,
    319                                               query,
    320                                               options,
    321                                               at_most_num_matches,
    322                                               results_ptr),
    323                                    base::Bind(&RunSearchMetadataCallback,
    324                                               callback,
    325                                               start_time,
    326                                               base::Passed(&results)));
    327 }
    328 
    329 bool FindAndHighlight(
    330     const std::string& text,
    331     base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
    332     std::string* highlighted_text) {
    333   DCHECK(query);
    334   DCHECK(highlighted_text);
    335   highlighted_text->clear();
    336 
    337   base::string16 text16 = base::UTF8ToUTF16(text);
    338   size_t match_start = 0;
    339   size_t match_length = 0;
    340   if (!query->Search(text16, &match_start, &match_length))
    341     return false;
    342 
    343   base::string16 pre = text16.substr(0, match_start);
    344   base::string16 match = text16.substr(match_start, match_length);
    345   base::string16 post = text16.substr(match_start + match_length);
    346   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre)));
    347   highlighted_text->append("<b>");
    348   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match)));
    349   highlighted_text->append("</b>");
    350   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post)));
    351   return true;
    352 }
    353 
    354 }  // namespace internal
    355 }  // namespace drive
    356