Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef BASE_I18N_CHAR_ITERATOR_H_
      6 #define BASE_I18N_CHAR_ITERATOR_H_
      7 
      8 #include <string>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/i18n/base_i18n_export.h"
     12 #include "base/strings/string16.h"
     13 
     14 // The CharIterator classes iterate through the characters in UTF8 and
     15 // UTF16 strings.  Example usage:
     16 //
     17 //   UTF8CharIterator iter(&str);
     18 //   while (!iter.End()) {
     19 //     VLOG(1) << iter.get();
     20 //     iter.Advance();
     21 //   }
     22 
     23 #if defined(OS_WIN)
     24 typedef unsigned char uint8_t;
     25 #endif
     26 
     27 namespace base {
     28 namespace i18n {
     29 
     30 class BASE_I18N_EXPORT UTF8CharIterator {
     31  public:
     32   // Requires |str| to live as long as the UTF8CharIterator does.
     33   explicit UTF8CharIterator(const std::string* str);
     34   ~UTF8CharIterator();
     35 
     36   // Return the starting array index of the current character within the
     37   // string.
     38   int32 array_pos() const { return array_pos_; }
     39 
     40   // Return the logical index of the current character, independent of the
     41   // number of bytes each character takes.
     42   int32 char_pos() const { return char_pos_; }
     43 
     44   // Return the current char.
     45   int32 get() const { return char_; }
     46 
     47   // Returns true if we're at the end of the string.
     48   bool end() const { return array_pos_ == len_; }
     49 
     50   // Advance to the next actual character.  Returns false if we're at the
     51   // end of the string.
     52   bool Advance();
     53 
     54  private:
     55   // The string we're iterating over.
     56   const uint8_t* str_;
     57 
     58   // The length of the encoded string.
     59   int32 len_;
     60 
     61   // Array index.
     62   int32 array_pos_;
     63 
     64   // The next array index.
     65   int32 next_pos_;
     66 
     67   // Character index.
     68   int32 char_pos_;
     69 
     70   // The current character.
     71   int32 char_;
     72 
     73   DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
     74 };
     75 
     76 class BASE_I18N_EXPORT UTF16CharIterator {
     77  public:
     78   // Requires |str| to live as long as the UTF16CharIterator does.
     79   explicit UTF16CharIterator(const string16* str);
     80   UTF16CharIterator(const char16* str, size_t str_len);
     81   ~UTF16CharIterator();
     82 
     83   // Return the starting array index of the current character within the
     84   // string.
     85   int32 array_pos() const { return array_pos_; }
     86 
     87   // Return the logical index of the current character, independent of the
     88   // number of codewords each character takes.
     89   int32 char_pos() const { return char_pos_; }
     90 
     91   // Return the current char.
     92   int32 get() const { return char_; }
     93 
     94   // Returns true if we're at the end of the string.
     95   bool end() const { return array_pos_ == len_; }
     96 
     97   // Advance to the next actual character.  Returns false if we're at the
     98   // end of the string.
     99   bool Advance();
    100 
    101  private:
    102   // Fills in the current character we found and advances to the next
    103   // character, updating all flags as necessary.
    104   void ReadChar();
    105 
    106   // The string we're iterating over.
    107   const char16* str_;
    108 
    109   // The length of the encoded string.
    110   int32 len_;
    111 
    112   // Array index.
    113   int32 array_pos_;
    114 
    115   // The next array index.
    116   int32 next_pos_;
    117 
    118   // Character index.
    119   int32 char_pos_;
    120 
    121   // The current character.
    122   int32 char_;
    123 
    124   DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
    125 };
    126 
    127 }  // namespace i18n
    128 }  // namespace base
    129 
    130 #endif  // BASE_I18N_CHAR_ITERATOR_H_
    131