Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/i18n/break_iterator.h"
      6 
      7 #include "base/logging.h"
      8 #include "unicode/ubrk.h"
      9 #include "unicode/uchar.h"
     10 #include "unicode/ustring.h"
     11 
     12 namespace base {
     13 
     14 const size_t npos = -1;
     15 
     16 BreakIterator::BreakIterator(const string16* str, BreakType break_type)
     17     : iter_(NULL),
     18       string_(str),
     19       break_type_(break_type),
     20       prev_(npos),
     21       pos_(0) {
     22 }
     23 
     24 BreakIterator::~BreakIterator() {
     25   if (iter_)
     26     ubrk_close(static_cast<UBreakIterator*>(iter_));
     27 }
     28 
     29 bool BreakIterator::Init() {
     30   UErrorCode status = U_ZERO_ERROR;
     31   UBreakIteratorType break_type;
     32   switch (break_type_) {
     33     case BREAK_WORD:
     34       break_type = UBRK_WORD;
     35       break;
     36     case BREAK_LINE:
     37     case BREAK_NEWLINE:
     38       break_type = UBRK_LINE;
     39       break;
     40     default:
     41       NOTREACHED() << "invalid break_type_";
     42       return false;
     43   }
     44   iter_ = ubrk_open(break_type, NULL,
     45                     string_->data(), static_cast<int32_t>(string_->size()),
     46                     &status);
     47   if (U_FAILURE(status)) {
     48     NOTREACHED() << "ubrk_open failed";
     49     return false;
     50   }
     51   // Move the iterator to the beginning of the string.
     52   ubrk_first(static_cast<UBreakIterator*>(iter_));
     53   return true;
     54 }
     55 
     56 bool BreakIterator::Advance() {
     57   int32_t pos;
     58   int32_t status;
     59   prev_ = pos_;
     60   switch (break_type_) {
     61     case BREAK_WORD:
     62     case BREAK_LINE:
     63       pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
     64       if (pos == UBRK_DONE) {
     65         pos_ = npos;
     66         return false;
     67       }
     68       pos_ = static_cast<size_t>(pos);
     69       return true;
     70     case BREAK_NEWLINE:
     71       do {
     72         pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
     73         if (pos == UBRK_DONE) {
     74           break;
     75         }
     76         pos_ = static_cast<size_t>(pos);
     77         status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
     78       } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
     79       if (pos == UBRK_DONE && prev_ == pos_) {
     80         pos_ = npos;
     81         return false;
     82       }
     83       return true;
     84     default:
     85       NOTREACHED() << "invalid break_type_";
     86       return false;
     87   }
     88 }
     89 
     90 bool BreakIterator::IsWord() const {
     91   return (break_type_ == BREAK_WORD &&
     92           ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) !=
     93           UBRK_WORD_NONE);
     94 }
     95 
     96 string16 BreakIterator::GetString() const {
     97   DCHECK(prev_ != npos && pos_ != npos);
     98   return string_->substr(prev_, pos_ - prev_);
     99 }
    100 
    101 }  // namespace base
    102