Home | History | Annotate | Download | only in gtk
      1 /*
      2  * Copyright (C) 2006 Lars Knoll <lars (at) trolltech.com>
      3  * Copyright (C) 2007 Apple Inc. All rights reserved.
      4  * Copyright (C) 2008 Jrg Billeter <j (at) bitron.ch>
      5  * Copyright (C) 2008 Dominik Rttsches <dominik.roettsches (at) access-company.com>
      6  *
      7  * This library is free software; you can redistribute it and/or
      8  * modify it under the terms of the GNU Library General Public
      9  * License as published by the Free Software Foundation; either
     10  * version 2 of the License, or (at your option) any later version.
     11  *
     12  * This library is distributed in the hope that it will be useful,
     13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  * Library General Public License for more details.
     16  *
     17  * You should have received a copy of the GNU Library General Public License
     18  * along with this library; see the file COPYING.LIB.  If not, write to
     19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  * Boston, MA 02110-1301, USA.
     21  *
     22  */
     23 
     24 #include "config.h"
     25 #include "TextBreakIterator.h"
     26 
     27 #include <pango/pango.h>
     28 #include <wtf/gtk/GOwnPtr.h>
     29 
     30 namespace WebCore {
     31 
     32 enum UBreakIteratorType {
     33     UBRK_CHARACTER,
     34     UBRK_WORD,
     35     UBRK_LINE,
     36     UBRK_SENTENCE
     37 };
     38 
     39 class TextBreakIterator {
     40 public:
     41     UBreakIteratorType m_type;
     42     int m_length;
     43     PangoLogAttr* m_logAttrs;
     44     int m_index;
     45 };
     46 
     47 static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
     48     UBreakIteratorType type, const UChar* string, int length)
     49 {
     50     if (!string)
     51         return 0;
     52 
     53     if (!createdIterator) {
     54         iterator = new TextBreakIterator();
     55         createdIterator = true;
     56     }
     57     if (!iterator)
     58         return 0;
     59 
     60     long utf8len;
     61     GOwnPtr<char> utf8;
     62     utf8.set(g_utf16_to_utf8(string, length, 0, &utf8len, 0));
     63 
     64     // FIXME: assumes no surrogate pairs
     65 
     66     iterator->m_type = type;
     67     iterator->m_length = length;
     68     if (createdIterator)
     69         g_free(iterator->m_logAttrs);
     70     iterator->m_logAttrs = g_new0(PangoLogAttr, length + 1);
     71     iterator->m_index = -1;
     72     pango_get_log_attrs(utf8.get(), utf8len, -1, 0, iterator->m_logAttrs, length + 1);
     73 
     74     return iterator;
     75 }
     76 
     77 TextBreakIterator* characterBreakIterator(const UChar* string, int length)
     78 {
     79     static bool createdCharacterBreakIterator = false;
     80     static TextBreakIterator* staticCharacterBreakIterator;
     81     return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
     82 }
     83 
     84 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
     85 {
     86     // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
     87     return characterBreakIterator(string, length);
     88 }
     89 
     90 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
     91 {
     92     static bool createdWordBreakIterator = false;
     93     static TextBreakIterator* staticWordBreakIterator;
     94     return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
     95 }
     96 
     97 TextBreakIterator* lineBreakIterator(const UChar* string, int length)
     98 {
     99     static bool createdLineBreakIterator = false;
    100     static TextBreakIterator* staticLineBreakIterator;
    101     return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
    102 }
    103 
    104 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
    105 {
    106     static bool createdSentenceBreakIterator = false;
    107     static TextBreakIterator* staticSentenceBreakIterator;
    108     return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
    109 }
    110 
    111 int textBreakFirst(TextBreakIterator* bi)
    112 {
    113     // see textBreakLast
    114 
    115     int firstCursorPosition = -1;
    116     int pos = 0;
    117     while (pos <= bi->m_length && (firstCursorPosition < 0)) {
    118         if (bi->m_logAttrs[pos].is_cursor_position)
    119             firstCursorPosition = pos;
    120     }
    121     bi->m_index = firstCursorPosition;
    122     return firstCursorPosition;
    123 }
    124 
    125 int textBreakLast(TextBreakIterator* bi)
    126 {
    127     // TextBreakLast is not meant to find just any break according to bi->m_type
    128     // but really the one near the last character.
    129     // (cmp ICU documentation for ubrk_first and ubrk_last)
    130     // From ICU docs for ubrk_last:
    131     // "Determine the index immediately beyond the last character in the text being scanned."
    132 
    133     // So we should advance or traverse back based on bi->m_logAttrs cursor positions.
    134     // If last character position in the original string is a whitespace,
    135     // traverse to the left until the first non-white character position is found
    136     // and return the position of the first white-space char after this one.
    137     // Otherwise return m_length, as "the first character beyond the last" is outside our string.
    138 
    139     bool whiteSpaceAtTheEnd = true;
    140     int nextWhiteSpacePos = bi->m_length;
    141 
    142     int pos = bi->m_length;
    143     while (pos >= 0 && whiteSpaceAtTheEnd) {
    144         if (bi->m_logAttrs[pos].is_cursor_position) {
    145             if (whiteSpaceAtTheEnd = bi->m_logAttrs[pos].is_white)
    146                 nextWhiteSpacePos = pos;
    147         }
    148         pos--;
    149     }
    150     bi->m_index = nextWhiteSpacePos;
    151     return nextWhiteSpacePos;
    152 }
    153 
    154 int textBreakNext(TextBreakIterator* bi)
    155 {
    156     for (int i = bi->m_index + 1; i <= bi->m_length; i++) {
    157 
    158         // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol ,
    159         // are not marked as word_start & word_end as opposed to the way ICU does it.
    160         // This leads to - for example - different word selection behaviour when right clicking.
    161 
    162         if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break)
    163             || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end))
    164             || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position)
    165             || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) {
    166             bi->m_index = i;
    167             return i;
    168         }
    169     }
    170     return TextBreakDone;
    171 }
    172 
    173 int textBreakPrevious(TextBreakIterator* bi)
    174 {
    175     for (int i = bi->m_index - 1; i >= 0; i--) {
    176         if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break)
    177             || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end))
    178             || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position)
    179             || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) {
    180             bi->m_index = i;
    181             return i;
    182         }
    183     }
    184     return textBreakFirst(bi);
    185 }
    186 
    187 int textBreakPreceding(TextBreakIterator* bi, int pos)
    188 {
    189     bi->m_index = pos;
    190     return textBreakPrevious(bi);
    191 }
    192 
    193 int textBreakFollowing(TextBreakIterator* bi, int pos)
    194 {
    195     if (pos < 0)
    196         pos = -1;
    197     bi->m_index = pos;
    198     return textBreakNext(bi);
    199 }
    200 
    201 int textBreakCurrent(TextBreakIterator* bi)
    202 {
    203     return bi->m_index;
    204 }
    205 
    206 bool isTextBreak(TextBreakIterator* bi, int pos)
    207 {
    208     if (bi->m_index < 0)
    209         return false;
    210 
    211     return ((bi->m_type == UBRK_LINE && bi->m_logAttrs[bi->m_index].is_line_break)
    212         || (bi->m_type == UBRK_WORD && bi->m_logAttrs[bi->m_index].is_word_end)
    213         || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[bi->m_index].is_char_break)
    214         || (bi->m_type == UBRK_SENTENCE && bi->m_logAttrs[bi->m_index].is_sentence_end) );
    215 }
    216 
    217 }
    218