1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_ 6 #define CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_ 7 8 #include "base/basictypes.h" 9 #include "base/memory/scoped_ptr.h" 10 #include "base/strings/string16.h" 11 12 namespace base { 13 namespace i18n { 14 class UTF16CharIterator; 15 } 16 } 17 18 namespace app_list { 19 20 // TermBreakIterator breaks terms out of a word. Terms are broken on 21 // camel case boundaries and alpha/number boundaries. Numbers are defined 22 // as [0-9\.,]+. 23 // e.g. 24 // CamelCase -> Camel, Case 25 // Python2.7 -> Python, 2.7 26 class TermBreakIterator { 27 public: 28 // Note that |word| must out live this iterator. 29 explicit TermBreakIterator(const string16& word); 30 ~TermBreakIterator(); 31 32 // Advance to the next term. Returns false if at the end of the word. 33 bool Advance(); 34 35 // Returns the current term, which is the substr of |word_| in range 36 // [prev_, pos_). 37 const string16 GetCurrentTerm() const; 38 39 size_t prev() const { return prev_; } 40 size_t pos() const { return pos_; } 41 42 static const size_t npos = -1; 43 44 private: 45 enum State { 46 STATE_START, // Initial state 47 STATE_NUMBER, // Current char is a number [0-9\.,]. 48 STATE_UPPER, // Current char is upper case. 49 STATE_LOWER, // Current char is lower case. 50 STATE_CHAR, // Current char has no case, e.g. a cjk char. 51 STATE_LAST, 52 }; 53 54 // Returns new state for given |ch|. 55 State GetNewState(char16 ch); 56 57 const string16& word_; 58 size_t prev_; 59 size_t pos_; 60 61 scoped_ptr<base::i18n::UTF16CharIterator> iter_; 62 State state_; 63 64 DISALLOW_COPY_AND_ASSIGN(TermBreakIterator); 65 }; 66 67 } // namespace app_list 68 69 #endif // CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_ 70