Home | History | Annotate | Download | only in drive
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/chromeos/drive/search_metadata.h"
      6 
      7 #include <algorithm>
      8 #include <queue>
      9 
     10 #include "base/bind.h"
     11 #include "base/i18n/string_search.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/strings/utf_string_conversions.h"
     14 #include "base/time/time.h"
     15 #include "chrome/browser/chromeos/drive/file_system_util.h"
     16 #include "content/public/browser/browser_thread.h"
     17 #include "google_apis/drive/gdata_wapi_parser.h"
     18 #include "net/base/escape.h"
     19 
     20 using content::BrowserThread;
     21 
     22 namespace drive {
     23 namespace internal {
     24 
     25 namespace {
     26 
     27 struct ResultCandidate {
     28   ResultCandidate(const std::string& local_id,
     29                   const ResourceEntry& entry,
     30                   const std::string& highlighted_base_name)
     31       : local_id(local_id),
     32         entry(entry),
     33         highlighted_base_name(highlighted_base_name) {
     34   }
     35 
     36   std::string local_id;
     37   ResourceEntry entry;
     38   std::string highlighted_base_name;
     39 };
     40 
     41 // Used to sort the result candidates per the last accessed/modified time. The
     42 // recently accessed/modified files come first.
     43 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
     44   const PlatformFileInfoProto& a_file_info = a.file_info();
     45   const PlatformFileInfoProto& b_file_info = b.file_info();
     46 
     47   if (a_file_info.last_accessed() != b_file_info.last_accessed())
     48     return a_file_info.last_accessed() > b_file_info.last_accessed();
     49 
     50   // When the entries have the same last access time (which happens quite often
     51   // because Drive server doesn't set the field until an entry is viewed via
     52   // drive.google.com), we use last modified time as the tie breaker.
     53   return a_file_info.last_modified() > b_file_info.last_modified();
     54 }
     55 
     56 struct ResultCandidateComparator {
     57   bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
     58     return CompareByTimestamp(a->entry, b->entry);
     59   }
     60 };
     61 
     62 // A wrapper of std::priority_queue which deals with pointers of values.
     63 template<typename T, typename Compare>
     64 class ScopedPriorityQueue {
     65  public:
     66   ScopedPriorityQueue() {}
     67 
     68   ~ScopedPriorityQueue() {
     69     while (!empty())
     70       pop();
     71   }
     72 
     73   bool empty() const { return queue_.empty(); }
     74 
     75   size_t size() const { return queue_.size(); }
     76 
     77   const T* top() const { return queue_.top(); }
     78 
     79   void push(T* x) { queue_.push(x); }
     80 
     81   void pop() {
     82     // Keep top alive for the pop() call so that debug checks can access
     83     // underlying data (e.g. validating heap property of the priority queue
     84     // will call the comparator).
     85     T* saved_top = queue_.top();
     86     queue_.pop();
     87     delete saved_top;
     88   }
     89 
     90  private:
     91   std::priority_queue<T*, std::vector<T*>, Compare> queue_;
     92 
     93   DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
     94 };
     95 
     96 // Classifies the given entry as hidden if it's not under specific directories.
     97 class HiddenEntryClassifier {
     98  public:
     99   HiddenEntryClassifier(ResourceMetadata* metadata,
    100                         const std::string& mydrive_local_id)
    101       : metadata_(metadata) {
    102     // Only things under My Drive and drive/other are not hidden.
    103     is_hiding_child_[mydrive_local_id] = false;
    104     is_hiding_child_[util::kDriveOtherDirLocalId] = false;
    105 
    106     // Everything else is hidden, including the directories mentioned above
    107     // themselves.
    108     is_hiding_child_[""] = true;
    109   }
    110 
    111   // |result| is set to true if |entry| is hidden.
    112   FileError IsHidden(const ResourceEntry& entry, bool* result) {
    113     // Look up for parents recursively.
    114     std::vector<std::string> undetermined_ids;
    115     undetermined_ids.push_back(entry.parent_local_id());
    116 
    117     std::map<std::string, bool>::iterator it =
    118         is_hiding_child_.find(undetermined_ids.back());
    119     for (; it == is_hiding_child_.end();
    120          it = is_hiding_child_.find(undetermined_ids.back())) {
    121       ResourceEntry parent;
    122       FileError error =
    123           metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
    124       if (error != FILE_ERROR_OK)
    125         return error;
    126       undetermined_ids.push_back(parent.parent_local_id());
    127     }
    128 
    129     // Cache the result.
    130     undetermined_ids.pop_back();  // The last one is already in the map.
    131     for (size_t i = 0; i < undetermined_ids.size(); ++i)
    132       is_hiding_child_[undetermined_ids[i]] = it->second;
    133 
    134     *result = it->second;
    135     return FILE_ERROR_OK;
    136   }
    137 
    138  private:
    139   ResourceMetadata* metadata_;
    140 
    141   // local ID to is_hidden map.
    142   std::map<std::string, bool> is_hiding_child_;
    143 };
    144 
    145 // Returns true if |entry| is eligible for the search |options| and should be
    146 // tested for the match with the query.  If
    147 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
    148 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
    149 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
    150 // the entries with shared-with-me label will be tested. If
    151 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
    152 // match with the query. This option can not be used with other options.
    153 bool IsEligibleEntry(const ResourceEntry& entry, int options) {
    154   if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
    155       entry.file_specific_info().is_hosted_document())
    156     return false;
    157 
    158   if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
    159       entry.file_info().is_directory())
    160     return false;
    161 
    162   if (options & SEARCH_METADATA_SHARED_WITH_ME)
    163     return entry.shared_with_me();
    164 
    165   if (options & SEARCH_METADATA_OFFLINE) {
    166     if (entry.file_specific_info().is_hosted_document()) {
    167       // Not all hosted documents are cached by Drive offline app.
    168       // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467
    169       switch (google_apis::ResourceEntry::GetEntryKindFromExtension(
    170           entry.file_specific_info().document_extension())) {
    171         case google_apis::ENTRY_KIND_DOCUMENT:
    172         case google_apis::ENTRY_KIND_SPREADSHEET:
    173         case google_apis::ENTRY_KIND_PRESENTATION:
    174         case google_apis::ENTRY_KIND_DRAWING:
    175           return true;
    176         default:
    177           return false;
    178       }
    179     } else {
    180       return entry.file_specific_info().cache_state().is_present();
    181     }
    182   }
    183 
    184   return true;
    185 }
    186 
    187 // Used to implement SearchMetadata.
    188 // Adds entry to the result when appropriate.
    189 // In particular, if |query| is non-null, only adds files with the name matching
    190 // the query.
    191 FileError MaybeAddEntryToResult(
    192     ResourceMetadata* resource_metadata,
    193     ResourceMetadata::Iterator* it,
    194     base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
    195     int options,
    196     size_t at_most_num_matches,
    197     HiddenEntryClassifier* hidden_entry_classifier,
    198     ScopedPriorityQueue<ResultCandidate,
    199                         ResultCandidateComparator>* result_candidates) {
    200   DCHECK_GE(at_most_num_matches, result_candidates->size());
    201 
    202   const ResourceEntry& entry = it->GetValue();
    203 
    204   // If the candidate set is already full, and this |entry| is old, do nothing.
    205   // We perform this check first in order to avoid the costly find-and-highlight
    206   // or FilePath lookup as much as possible.
    207   if (result_candidates->size() == at_most_num_matches &&
    208       !CompareByTimestamp(entry, result_candidates->top()->entry))
    209     return FILE_ERROR_OK;
    210 
    211   // Add |entry| to the result if the entry is eligible for the given
    212   // |options| and matches the query. The base name of the entry must
    213   // contain |query| to match the query.
    214   std::string highlighted;
    215   if (!IsEligibleEntry(entry, options) ||
    216       (query && !FindAndHighlight(entry.base_name(), query, &highlighted)))
    217     return FILE_ERROR_OK;
    218 
    219   // Hidden entry should not be returned.
    220   bool hidden = false;
    221   FileError error = hidden_entry_classifier->IsHidden(entry, &hidden);
    222   if (error != FILE_ERROR_OK || hidden)
    223     return error;
    224 
    225   // Make space for |entry| when appropriate.
    226   if (result_candidates->size() == at_most_num_matches)
    227     result_candidates->pop();
    228   result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
    229   return FILE_ERROR_OK;
    230 }
    231 
    232 // Implements SearchMetadata().
    233 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
    234                                        const std::string& query_text,
    235                                        int options,
    236                                        int at_most_num_matches,
    237                                        MetadataSearchResultVector* results) {
    238   ScopedPriorityQueue<ResultCandidate,
    239                       ResultCandidateComparator> result_candidates;
    240 
    241   // Prepare data structure for searching.
    242   base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query(
    243       base::UTF8ToUTF16(query_text));
    244 
    245   // Prepare an object to filter out hidden entries.
    246   ResourceEntry mydrive;
    247   FileError error = resource_metadata->GetResourceEntryByPath(
    248       util::GetDriveMyDriveRootPath(), &mydrive);
    249   if (error != FILE_ERROR_OK)
    250     return error;
    251   HiddenEntryClassifier hidden_entry_classifier(resource_metadata,
    252                                                 mydrive.local_id());
    253 
    254   // Iterate over entries.
    255   scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
    256   for (; !it->IsAtEnd(); it->Advance()) {
    257     FileError error = MaybeAddEntryToResult(resource_metadata, it.get(),
    258                                             query_text.empty() ? NULL : &query,
    259                                             options,
    260                                             at_most_num_matches,
    261                                             &hidden_entry_classifier,
    262                                             &result_candidates);
    263     if (error != FILE_ERROR_OK)
    264       return error;
    265   }
    266 
    267   // Prepare the result.
    268   for (; !result_candidates.empty(); result_candidates.pop()) {
    269     const ResultCandidate& candidate = *result_candidates.top();
    270     // The path field of entries in result_candidates are empty at this point,
    271     // because we don't want to run the expensive metadata DB look up except for
    272     // the final results. Hence, here we fill the part.
    273     base::FilePath path;
    274     error = resource_metadata->GetFilePath(candidate.local_id, &path);
    275     if (error != FILE_ERROR_OK)
    276       return error;
    277     bool is_directory = candidate.entry.file_info().is_directory();
    278     results->push_back(MetadataSearchResult(
    279         path, is_directory, candidate.highlighted_base_name));
    280   }
    281 
    282   // Reverse the order here because |result_candidates| puts the most
    283   // uninteresting candidate at the top.
    284   std::reverse(results->begin(), results->end());
    285 
    286   return FILE_ERROR_OK;
    287 }
    288 
    289 // Runs the SearchMetadataCallback and updates the histogram.
    290 void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
    291                                const base::TimeTicks& start_time,
    292                                scoped_ptr<MetadataSearchResultVector> results,
    293                                FileError error) {
    294   if (error != FILE_ERROR_OK)
    295     results.reset();
    296   callback.Run(error, results.Pass());
    297 
    298   UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
    299                       base::TimeTicks::Now() - start_time);
    300 }
    301 
    302 }  // namespace
    303 
    304 void SearchMetadata(
    305     scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
    306     ResourceMetadata* resource_metadata,
    307     const std::string& query,
    308     int options,
    309     int at_most_num_matches,
    310     const SearchMetadataCallback& callback) {
    311   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    312   DCHECK_LE(0, at_most_num_matches);
    313   DCHECK(!callback.is_null());
    314 
    315   const base::TimeTicks start_time = base::TimeTicks::Now();
    316 
    317   scoped_ptr<MetadataSearchResultVector> results(
    318       new MetadataSearchResultVector);
    319   MetadataSearchResultVector* results_ptr = results.get();
    320   base::PostTaskAndReplyWithResult(blocking_task_runner.get(),
    321                                    FROM_HERE,
    322                                    base::Bind(&SearchMetadataOnBlockingPool,
    323                                               resource_metadata,
    324                                               query,
    325                                               options,
    326                                               at_most_num_matches,
    327                                               results_ptr),
    328                                    base::Bind(&RunSearchMetadataCallback,
    329                                               callback,
    330                                               start_time,
    331                                               base::Passed(&results)));
    332 }
    333 
    334 bool FindAndHighlight(
    335     const std::string& text,
    336     base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
    337     std::string* highlighted_text) {
    338   DCHECK(query);
    339   DCHECK(highlighted_text);
    340   highlighted_text->clear();
    341 
    342   base::string16 text16 = base::UTF8ToUTF16(text);
    343   size_t match_start = 0;
    344   size_t match_length = 0;
    345   if (!query->Search(text16, &match_start, &match_length))
    346     return false;
    347 
    348   base::string16 pre = text16.substr(0, match_start);
    349   base::string16 match = text16.substr(match_start, match_length);
    350   base::string16 post = text16.substr(match_start + match_length);
    351   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre)));
    352   highlighted_text->append("<b>");
    353   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match)));
    354   highlighted_text->append("</b>");
    355   highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post)));
    356   return true;
    357 }
    358 
    359 }  // namespace internal
    360 }  // namespace drive
    361