Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/i18n/break_iterator.h"
      6 
      7 #include "base/logging.h"
      8 #include "third_party/icu/source/common/unicode/ubrk.h"
      9 #include "third_party/icu/source/common/unicode/uchar.h"
     10 #include "third_party/icu/source/common/unicode/ustring.h"
     11 
     12 namespace base {
     13 namespace i18n {
     14 
     15 const size_t npos = -1;
     16 
     17 BreakIterator::BreakIterator(const string16& str, BreakType break_type)
     18     : iter_(NULL),
     19       string_(str),
     20       break_type_(break_type),
     21       prev_(npos),
     22       pos_(0) {
     23 }
     24 
     25 BreakIterator::~BreakIterator() {
     26   if (iter_)
     27     ubrk_close(static_cast<UBreakIterator*>(iter_));
     28 }
     29 
     30 bool BreakIterator::Init() {
     31   UErrorCode status = U_ZERO_ERROR;
     32   UBreakIteratorType break_type;
     33   switch (break_type_) {
     34     case BREAK_CHARACTER:
     35       break_type = UBRK_CHARACTER;
     36       break;
     37     case BREAK_WORD:
     38       break_type = UBRK_WORD;
     39       break;
     40     case BREAK_LINE:
     41     case BREAK_NEWLINE:
     42       break_type = UBRK_LINE;
     43       break;
     44     default:
     45       NOTREACHED() << "invalid break_type_";
     46       return false;
     47   }
     48   iter_ = ubrk_open(break_type, NULL,
     49                     string_.data(), static_cast<int32_t>(string_.size()),
     50                     &status);
     51   if (U_FAILURE(status)) {
     52     NOTREACHED() << "ubrk_open failed";
     53     return false;
     54   }
     55   // Move the iterator to the beginning of the string.
     56   ubrk_first(static_cast<UBreakIterator*>(iter_));
     57   return true;
     58 }
     59 
     60 bool BreakIterator::Advance() {
     61   int32_t pos;
     62   int32_t status;
     63   prev_ = pos_;
     64   switch (break_type_) {
     65     case BREAK_CHARACTER:
     66     case BREAK_WORD:
     67     case BREAK_LINE:
     68       pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
     69       if (pos == UBRK_DONE) {
     70         pos_ = npos;
     71         return false;
     72       }
     73       pos_ = static_cast<size_t>(pos);
     74       return true;
     75     case BREAK_NEWLINE:
     76       do {
     77         pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
     78         if (pos == UBRK_DONE)
     79           break;
     80         pos_ = static_cast<size_t>(pos);
     81         status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
     82       } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
     83       if (pos == UBRK_DONE && prev_ == pos_) {
     84         pos_ = npos;
     85         return false;
     86       }
     87       return true;
     88     default:
     89       NOTREACHED() << "invalid break_type_";
     90       return false;
     91   }
     92 }
     93 
     94 bool BreakIterator::IsWord() const {
     95   int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
     96   return (break_type_ == BREAK_WORD && status != UBRK_WORD_NONE);
     97 }
     98 
     99 bool BreakIterator::IsEndOfWord(size_t position) const {
    100   if (break_type_ != BREAK_WORD)
    101     return false;
    102 
    103   UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
    104   UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
    105   int32_t status = ubrk_getRuleStatus(iter);
    106   return (!!boundary && status != UBRK_WORD_NONE);
    107 }
    108 
    109 bool BreakIterator::IsStartOfWord(size_t position) const {
    110   if (break_type_ != BREAK_WORD)
    111     return false;
    112 
    113   UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
    114   UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
    115   ubrk_next(iter);
    116   int32_t next_status = ubrk_getRuleStatus(iter);
    117   return (!!boundary && next_status != UBRK_WORD_NONE);
    118 }
    119 
    120 string16 BreakIterator::GetString() const {
    121   DCHECK(prev_ != npos && pos_ != npos);
    122   return string_.substr(prev_, pos_ - prev_);
    123 }
    124 
    125 }  // namespace i18n
    126 }  // namespace base
    127