Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2006 Lars Knoll <lars (at) trolltech.com>
      3  * Copyright (C) 2007 Apple Inc. All rights reserved.
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #ifndef TextBreakIterator_h
     23 #define TextBreakIterator_h
     24 
     25 #include <wtf/unicode/Unicode.h>
     26 
     27 namespace WebCore {
     28 
     29     class TextBreakIterator;
     30 
     31     // Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
     32 
     33     // Iterates over "extended grapheme clusters", as defined in UAX #29.
     34     // Note that platform implementations may be less sophisticated - e.g. ICU prior to
     35     // version 4.0 only supports "legacy grapheme clusters".
     36     // Use this for general text processing, e.g. string truncation.
     37     TextBreakIterator* characterBreakIterator(const UChar*, int length);
     38 
     39     // This is similar to character break iterator in most cases, but is subject to
     40     // platform UI conventions. One notable example where this can be different
     41     // from character break iterator is Thai prepend characters, see bug 24342.
     42     // Use this for insertion point and selection manipulations.
     43     TextBreakIterator* cursorMovementIterator(const UChar*, int length);
     44 
     45     TextBreakIterator* wordBreakIterator(const UChar*, int length);
     46     TextBreakIterator* acquireLineBreakIterator(const UChar*, int length);
     47     void releaseLineBreakIterator(TextBreakIterator*);
     48     TextBreakIterator* sentenceBreakIterator(const UChar*, int length);
     49 
     50     int textBreakFirst(TextBreakIterator*);
     51     int textBreakLast(TextBreakIterator*);
     52     int textBreakNext(TextBreakIterator*);
     53     int textBreakPrevious(TextBreakIterator*);
     54     int textBreakCurrent(TextBreakIterator*);
     55     int textBreakPreceding(TextBreakIterator*, int);
     56     int textBreakFollowing(TextBreakIterator*, int);
     57     bool isTextBreak(TextBreakIterator*, int);
     58 
     59     const int TextBreakDone = -1;
     60 
     61 class LazyLineBreakIterator {
     62 public:
     63     LazyLineBreakIterator(const UChar* string = 0, int length = 0)
     64         : m_string(string)
     65         , m_length(length)
     66         , m_iterator(0)
     67     {
     68     }
     69 
     70     ~LazyLineBreakIterator()
     71     {
     72         if (m_iterator)
     73             releaseLineBreakIterator(m_iterator);
     74     }
     75 
     76     const UChar* string() const { return m_string; }
     77     int length() const { return m_length; }
     78 
     79     TextBreakIterator* get()
     80     {
     81         if (!m_iterator)
     82             m_iterator = acquireLineBreakIterator(m_string, m_length);
     83         return m_iterator;
     84     }
     85 
     86     void reset(const UChar* string, int length)
     87     {
     88         if (m_iterator)
     89             releaseLineBreakIterator(m_iterator);
     90 
     91         m_string = string;
     92         m_length = length;
     93         m_iterator = 0;
     94     }
     95 
     96 private:
     97     const UChar* m_string;
     98     int m_length;
     99     TextBreakIterator* m_iterator;
    100 };
    101 
    102 }
    103 
    104 #endif
    105