1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/break_iterator.h" 6 7 #include "base/logging.h" 8 #include "unicode/ubrk.h" 9 #include "unicode/uchar.h" 10 #include "unicode/ustring.h" 11 12 namespace base { 13 14 const size_t npos = -1; 15 16 BreakIterator::BreakIterator(const string16* str, BreakType break_type) 17 : iter_(NULL), 18 string_(str), 19 break_type_(break_type), 20 prev_(npos), 21 pos_(0) { 22 } 23 24 BreakIterator::~BreakIterator() { 25 if (iter_) 26 ubrk_close(static_cast<UBreakIterator*>(iter_)); 27 } 28 29 bool BreakIterator::Init() { 30 UErrorCode status = U_ZERO_ERROR; 31 UBreakIteratorType break_type; 32 switch (break_type_) { 33 case BREAK_WORD: 34 break_type = UBRK_WORD; 35 break; 36 case BREAK_LINE: 37 case BREAK_NEWLINE: 38 break_type = UBRK_LINE; 39 break; 40 default: 41 NOTREACHED() << "invalid break_type_"; 42 return false; 43 } 44 iter_ = ubrk_open(break_type, NULL, 45 string_->data(), static_cast<int32_t>(string_->size()), 46 &status); 47 if (U_FAILURE(status)) { 48 NOTREACHED() << "ubrk_open failed"; 49 return false; 50 } 51 // Move the iterator to the beginning of the string. 52 ubrk_first(static_cast<UBreakIterator*>(iter_)); 53 return true; 54 } 55 56 bool BreakIterator::Advance() { 57 int32_t pos; 58 int32_t status; 59 prev_ = pos_; 60 switch (break_type_) { 61 case BREAK_WORD: 62 case BREAK_LINE: 63 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); 64 if (pos == UBRK_DONE) { 65 pos_ = npos; 66 return false; 67 } 68 pos_ = static_cast<size_t>(pos); 69 return true; 70 case BREAK_NEWLINE: 71 do { 72 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); 73 if (pos == UBRK_DONE) { 74 break; 75 } 76 pos_ = static_cast<size_t>(pos); 77 status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); 78 } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); 79 if (pos == UBRK_DONE && prev_ == pos_) { 80 pos_ = npos; 81 return false; 82 } 83 return true; 84 default: 85 NOTREACHED() << "invalid break_type_"; 86 return false; 87 } 88 } 89 90 bool BreakIterator::IsWord() const { 91 return (break_type_ == BREAK_WORD && 92 ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) != 93 UBRK_WORD_NONE); 94 } 95 96 string16 BreakIterator::GetString() const { 97 DCHECK(prev_ != npos && pos_ != npos); 98 return string_->substr(prev_, pos_ - prev_); 99 } 100 101 } // namespace base 102