1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/break_iterator.h" 6 7 #include "base/logging.h" 8 #include "third_party/icu/source/common/unicode/ubrk.h" 9 #include "third_party/icu/source/common/unicode/uchar.h" 10 #include "third_party/icu/source/common/unicode/ustring.h" 11 12 namespace base { 13 namespace i18n { 14 15 const size_t npos = -1; 16 17 BreakIterator::BreakIterator(const string16& str, BreakType break_type) 18 : iter_(NULL), 19 string_(str), 20 break_type_(break_type), 21 prev_(npos), 22 pos_(0) { 23 } 24 25 BreakIterator::~BreakIterator() { 26 if (iter_) 27 ubrk_close(static_cast<UBreakIterator*>(iter_)); 28 } 29 30 bool BreakIterator::Init() { 31 UErrorCode status = U_ZERO_ERROR; 32 UBreakIteratorType break_type; 33 switch (break_type_) { 34 case BREAK_CHARACTER: 35 break_type = UBRK_CHARACTER; 36 break; 37 case BREAK_WORD: 38 break_type = UBRK_WORD; 39 break; 40 case BREAK_LINE: 41 case BREAK_NEWLINE: 42 break_type = UBRK_LINE; 43 break; 44 default: 45 NOTREACHED() << "invalid break_type_"; 46 return false; 47 } 48 iter_ = ubrk_open(break_type, NULL, 49 string_.data(), static_cast<int32_t>(string_.size()), 50 &status); 51 if (U_FAILURE(status)) { 52 NOTREACHED() << "ubrk_open failed"; 53 return false; 54 } 55 // Move the iterator to the beginning of the string. 56 ubrk_first(static_cast<UBreakIterator*>(iter_)); 57 return true; 58 } 59 60 bool BreakIterator::Advance() { 61 int32_t pos; 62 int32_t status; 63 prev_ = pos_; 64 switch (break_type_) { 65 case BREAK_CHARACTER: 66 case BREAK_WORD: 67 case BREAK_LINE: 68 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); 69 if (pos == UBRK_DONE) { 70 pos_ = npos; 71 return false; 72 } 73 pos_ = static_cast<size_t>(pos); 74 return true; 75 case BREAK_NEWLINE: 76 do { 77 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); 78 if (pos == UBRK_DONE) 79 break; 80 pos_ = static_cast<size_t>(pos); 81 status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); 82 } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); 83 if (pos == UBRK_DONE && prev_ == pos_) { 84 pos_ = npos; 85 return false; 86 } 87 return true; 88 default: 89 NOTREACHED() << "invalid break_type_"; 90 return false; 91 } 92 } 93 94 bool BreakIterator::IsWord() const { 95 int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); 96 return (break_type_ == BREAK_WORD && status != UBRK_WORD_NONE); 97 } 98 99 bool BreakIterator::IsEndOfWord(size_t position) const { 100 if (break_type_ != BREAK_WORD) 101 return false; 102 103 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); 104 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); 105 int32_t status = ubrk_getRuleStatus(iter); 106 return (!!boundary && status != UBRK_WORD_NONE); 107 } 108 109 bool BreakIterator::IsStartOfWord(size_t position) const { 110 if (break_type_ != BREAK_WORD) 111 return false; 112 113 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); 114 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); 115 ubrk_next(iter); 116 int32_t next_status = ubrk_getRuleStatus(iter); 117 return (!!boundary && next_status != UBRK_WORD_NONE); 118 } 119 120 string16 BreakIterator::GetString() const { 121 DCHECK(prev_ != npos && pos_ != npos); 122 return string_.substr(prev_, pos_ - prev_); 123 } 124 125 } // namespace i18n 126 } // namespace base 127