Home | History | Annotate | Download | only in search
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_
      6 #define CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_
      7 
      8 #include "base/basictypes.h"
      9 #include "base/memory/scoped_ptr.h"
     10 #include "base/strings/string16.h"
     11 
     12 namespace base {
     13 namespace i18n {
     14 class UTF16CharIterator;
     15 }
     16 }
     17 
     18 namespace app_list {
     19 
     20 // TermBreakIterator breaks terms out of a word. Terms are broken on
     21 // camel case boundaries and alpha/number boundaries. Numbers are defined
     22 // as [0-9\.,]+.
     23 //  e.g.
     24 //   CamelCase -> Camel, Case
     25 //   Python2.7 -> Python, 2.7
     26 class TermBreakIterator {
     27  public:
     28   // Note that |word| must out live this iterator.
     29   explicit TermBreakIterator(const string16& word);
     30   ~TermBreakIterator();
     31 
     32   // Advance to the next term. Returns false if at the end of the word.
     33   bool Advance();
     34 
     35   // Returns the current term, which is the substr of |word_| in range
     36   // [prev_, pos_).
     37   const string16 GetCurrentTerm() const;
     38 
     39   size_t prev() const { return prev_; }
     40   size_t pos() const { return pos_; }
     41 
     42   static const size_t npos = -1;
     43 
     44  private:
     45   enum State {
     46     STATE_START,   // Initial state
     47     STATE_NUMBER,  // Current char is a number [0-9\.,].
     48     STATE_UPPER,   // Current char is upper case.
     49     STATE_LOWER,   // Current char is lower case.
     50     STATE_CHAR,    // Current char has no case, e.g. a cjk char.
     51     STATE_LAST,
     52   };
     53 
     54   // Returns new state for given |ch|.
     55   State GetNewState(char16 ch);
     56 
     57   const string16& word_;
     58   size_t prev_;
     59   size_t pos_;
     60 
     61   scoped_ptr<base::i18n::UTF16CharIterator> iter_;
     62   State state_;
     63 
     64   DISALLOW_COPY_AND_ASSIGN(TermBreakIterator);
     65 };
     66 
     67 }  // namespace app_list
     68 
     69 #endif  // CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_
     70