Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2006 Lars Knoll <lars (at) trolltech.com>
      3  * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #include "config.h"
     23 #include "core/platform/text/TextBreakIterator.h"
     24 
     25 #include "core/platform/text/LineBreakIteratorPoolICU.h"
     26 #include "wtf/text/WTFString.h"
     27 
     28 using namespace WTF;
     29 using namespace std;
     30 
     31 namespace WebCore {
     32 
     33 static TextBreakIterator* ensureIterator(bool& createdIterator, TextBreakIterator*& iterator, UBreakIteratorType type)
     34 {
     35     if (!createdIterator) {
     36         UErrorCode openStatus = U_ZERO_ERROR;
     37         iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
     38         createdIterator = true;
     39         ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
     40     }
     41     return iterator;
     42 }
     43 
     44 enum TextContext { NoContext, PriorContext, PrimaryContext };
     45 
     46 const int textBufferCapacity = 16;
     47 
     48 typedef struct {
     49     UText text;
     50     UChar buffer[textBufferCapacity];
     51 } UTextWithBuffer;
     52 
     53 static inline int64_t textPinIndex(int64_t& index, int64_t limit)
     54 {
     55     if (index < 0)
     56         index = 0;
     57     else if (index > limit)
     58         index = limit;
     59     return index;
     60 }
     61 
     62 static inline int64_t textNativeLength(UText* text)
     63 {
     64     return text->a + text->b;
     65 }
     66 
     67 // Relocate pointer from source into destination as required.
     68 static void textFixPointer(const UText* source, UText* destination, const void*& pointer)
     69 {
     70     if (pointer >= source->pExtra && pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
     71         // Pointer references source extra buffer.
     72         pointer = static_cast<char*>(destination->pExtra) + (static_cast<const char*>(pointer) - static_cast<const char*>(source->pExtra));
     73     } else if (pointer >= source && pointer < reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
     74         // Pointer references source text structure, but not source extra buffer.
     75         pointer = reinterpret_cast<char*>(destination) + (static_cast<const char*>(pointer) - reinterpret_cast<const char*>(source));
     76     }
     77 }
     78 
     79 static UText* textClone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
     80 {
     81     ASSERT_UNUSED(deep, !deep);
     82     if (U_FAILURE(*status))
     83         return 0;
     84     int32_t extraSize = source->extraSize;
     85     destination = utext_setup(destination, extraSize, status);
     86     if (U_FAILURE(*status))
     87         return destination;
     88     void* extraNew = destination->pExtra;
     89     int32_t flags = destination->flags;
     90     int sizeToCopy = min(source->sizeOfStruct, destination->sizeOfStruct);
     91     memcpy(destination, source, sizeToCopy);
     92     destination->pExtra = extraNew;
     93     destination->flags = flags;
     94     memcpy(destination->pExtra, source->pExtra, extraSize);
     95     textFixPointer(source, destination, destination->context);
     96     textFixPointer(source, destination, destination->p);
     97     textFixPointer(source, destination, destination->q);
     98     ASSERT(!destination->r);
     99     const void * chunkContents = static_cast<const void*>(destination->chunkContents);
    100     textFixPointer(source, destination, chunkContents);
    101     destination->chunkContents = static_cast<const UChar*>(chunkContents);
    102     return destination;
    103 }
    104 
    105 static int32_t textExtract(UText* text, int64_t start, int64_t limit, UChar* destination, int32_t destinationCapacity, UErrorCode* errorCode)
    106 {
    107     UNUSED_PARAM(text);
    108     UNUSED_PARAM(start);
    109     UNUSED_PARAM(limit);
    110     UNUSED_PARAM(destination);
    111     UNUSED_PARAM(destinationCapacity);
    112     // In the present context, this text provider is used only with ICU functions
    113     // that do not perform an extract operation.
    114     ASSERT_NOT_REACHED();
    115     *errorCode = U_UNSUPPORTED_ERROR;
    116     return 0;
    117 }
    118 
    119 static void textClose(UText* text)
    120 {
    121     text->context = 0;
    122 }
    123 
    124 static inline TextContext textGetContext(const UText* text, int64_t nativeIndex, UBool forward)
    125 {
    126     if (!text->b || nativeIndex > text->b)
    127         return PrimaryContext;
    128     if (nativeIndex == text->b)
    129         return forward ? PrimaryContext : PriorContext;
    130     return PriorContext;
    131 }
    132 
    133 static inline TextContext textLatin1GetCurrentContext(const UText* text)
    134 {
    135     if (!text->chunkContents)
    136         return NoContext;
    137     return text->chunkContents == text->pExtra ? PrimaryContext : PriorContext;
    138 }
    139 
    140 static void textLatin1MoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    141 {
    142     ASSERT(text->chunkContents == text->pExtra);
    143     if (forward) {
    144         ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength);
    145         text->chunkNativeStart = nativeIndex;
    146         text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar);
    147         if (text->chunkNativeLimit > nativeLength)
    148             text->chunkNativeLimit = nativeLength;
    149     } else {
    150         ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength);
    151         text->chunkNativeLimit = nativeIndex;
    152         text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar);
    153         if (text->chunkNativeStart < text->b)
    154             text->chunkNativeStart = text->b;
    155     }
    156     int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
    157     // Ensure chunk length is well defined if computed length exceeds int32_t range.
    158     ASSERT(length <= numeric_limits<int32_t>::max());
    159     text->chunkLength = length <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
    160     text->nativeIndexingLimit = text->chunkLength;
    161     text->chunkOffset = forward ? 0 : text->chunkLength;
    162     StringImpl::copyChars(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength));
    163 }
    164 
    165 static void textLatin1SwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    166 {
    167     ASSERT(!text->chunkContents || text->chunkContents == text->q);
    168     text->chunkContents = static_cast<const UChar*>(text->pExtra);
    169     textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
    170 }
    171 
    172 static void textLatin1MoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    173 {
    174     ASSERT(text->chunkContents == text->q);
    175     ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
    176     ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
    177     ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
    178     text->chunkNativeStart = 0;
    179     text->chunkNativeLimit = text->b;
    180     text->chunkLength = text->b;
    181     text->nativeIndexingLimit = text->chunkLength;
    182     int64_t offset = nativeIndex - text->chunkNativeStart;
    183     // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
    184     ASSERT(offset <= numeric_limits<int32_t>::max());
    185     text->chunkOffset = min(offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
    186 }
    187 
    188 static void textLatin1SwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    189 {
    190     ASSERT(!text->chunkContents || text->chunkContents == text->pExtra);
    191     text->chunkContents = static_cast<const UChar*>(text->q);
    192     textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
    193 }
    194 
    195 static inline bool textInChunkOrOutOfRange(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward, UBool& isAccessible)
    196 {
    197     if (forward) {
    198         if (nativeIndex >= text->chunkNativeStart && nativeIndex < text->chunkNativeLimit) {
    199             int64_t offset = nativeIndex - text->chunkNativeStart;
    200             // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
    201             ASSERT(offset <= numeric_limits<int32_t>::max());
    202             text->chunkOffset = offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
    203             isAccessible = TRUE;
    204             return true;
    205         }
    206         if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) {
    207             text->chunkOffset = text->chunkLength;
    208             isAccessible = FALSE;
    209             return true;
    210         }
    211     } else {
    212         if (nativeIndex > text->chunkNativeStart && nativeIndex <= text->chunkNativeLimit) {
    213             int64_t offset = nativeIndex - text->chunkNativeStart;
    214             // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
    215             ASSERT(offset <= numeric_limits<int32_t>::max());
    216             text->chunkOffset = offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
    217             isAccessible = TRUE;
    218             return true;
    219         }
    220         if (nativeIndex <= 0 && !text->chunkNativeStart) {
    221             text->chunkOffset = 0;
    222             isAccessible = FALSE;
    223             return true;
    224         }
    225     }
    226     return false;
    227 }
    228 
    229 static UBool textLatin1Access(UText* text, int64_t nativeIndex, UBool forward)
    230 {
    231     if (!text->context)
    232         return FALSE;
    233     int64_t nativeLength = textNativeLength(text);
    234     UBool isAccessible;
    235     if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
    236         return isAccessible;
    237     nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
    238     TextContext currentContext = textLatin1GetCurrentContext(text);
    239     TextContext newContext = textGetContext(text, nativeIndex, forward);
    240     ASSERT(newContext != NoContext);
    241     if (newContext == currentContext) {
    242         if (currentContext == PrimaryContext) {
    243             textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
    244         } else {
    245             textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
    246         }
    247     } else if (newContext == PrimaryContext) {
    248         textLatin1SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
    249     } else {
    250         ASSERT(newContext == PriorContext);
    251         textLatin1SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
    252     }
    253     return TRUE;
    254 }
    255 
    256 static const struct UTextFuncs textLatin1Funcs = {
    257     sizeof(UTextFuncs),
    258     0, 0, 0,
    259     textClone,
    260     textNativeLength,
    261     textLatin1Access,
    262     textExtract,
    263     0, 0, 0, 0,
    264     textClose,
    265     0, 0, 0,
    266 };
    267 
    268 static void textInit(UText* text, const UTextFuncs* funcs, const void* string, unsigned length, const UChar* priorContext, int priorContextLength)
    269 {
    270     text->pFuncs = funcs;
    271     text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
    272     text->context = string;
    273     text->p = string;
    274     text->a = length;
    275     text->q = priorContext;
    276     text->b = priorContextLength;
    277 }
    278 
    279 static UText* textOpenLatin1(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
    280 {
    281     if (U_FAILURE(*status))
    282         return 0;
    283 
    284     if (!string || length > static_cast<unsigned>(numeric_limits<int32_t>::max())) {
    285         *status = U_ILLEGAL_ARGUMENT_ERROR;
    286         return 0;
    287     }
    288     UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
    289     if (U_FAILURE(*status)) {
    290         ASSERT(!text);
    291         return 0;
    292     }
    293     textInit(text, &textLatin1Funcs, string, length, priorContext, priorContextLength);
    294     return text;
    295 }
    296 
    297 static inline TextContext textUTF16GetCurrentContext(const UText* text)
    298 {
    299     if (!text->chunkContents)
    300         return NoContext;
    301     return text->chunkContents == text->p ? PrimaryContext : PriorContext;
    302 }
    303 
    304 static void textUTF16MoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    305 {
    306     ASSERT(text->chunkContents == text->p);
    307     ASSERT_UNUSED(forward, forward ? nativeIndex >= text->b : nativeIndex > text->b);
    308     ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
    309     text->chunkNativeStart = text->b;
    310     text->chunkNativeLimit = nativeLength;
    311     int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
    312     // Ensure chunk length is well defined if computed length exceeds int32_t range.
    313     ASSERT(length <= numeric_limits<int32_t>::max());
    314     text->chunkLength = length <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
    315     text->nativeIndexingLimit = text->chunkLength;
    316     int64_t offset = nativeIndex - text->chunkNativeStart;
    317     // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
    318     ASSERT(offset <= numeric_limits<int32_t>::max());
    319     text->chunkOffset = min(offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
    320 }
    321 
    322 static void textUTF16SwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    323 {
    324     ASSERT(!text->chunkContents || text->chunkContents == text->q);
    325     text->chunkContents = static_cast<const UChar*>(text->p);
    326     textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
    327 }
    328 
    329 static void textUTF16MoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    330 {
    331     ASSERT(text->chunkContents == text->q);
    332     ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
    333     ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
    334     ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
    335     text->chunkNativeStart = 0;
    336     text->chunkNativeLimit = text->b;
    337     text->chunkLength = text->b;
    338     text->nativeIndexingLimit = text->chunkLength;
    339     int64_t offset = nativeIndex - text->chunkNativeStart;
    340     // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
    341     ASSERT(offset <= numeric_limits<int32_t>::max());
    342     text->chunkOffset = min(offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
    343 }
    344 
    345 static void textUTF16SwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
    346 {
    347     ASSERT(!text->chunkContents || text->chunkContents == text->p);
    348     text->chunkContents = static_cast<const UChar*>(text->q);
    349     textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
    350 }
    351 
    352 static UBool textUTF16Access(UText* text, int64_t nativeIndex, UBool forward)
    353 {
    354     if (!text->context)
    355         return FALSE;
    356     int64_t nativeLength = textNativeLength(text);
    357     UBool isAccessible;
    358     if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
    359         return isAccessible;
    360     nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
    361     TextContext currentContext = textUTF16GetCurrentContext(text);
    362     TextContext newContext = textGetContext(text, nativeIndex, forward);
    363     ASSERT(newContext != NoContext);
    364     if (newContext == currentContext) {
    365         if (currentContext == PrimaryContext) {
    366             textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
    367         } else {
    368             textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
    369         }
    370     } else if (newContext == PrimaryContext) {
    371         textUTF16SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
    372     } else {
    373         ASSERT(newContext == PriorContext);
    374         textUTF16SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
    375     }
    376     return TRUE;
    377 }
    378 
    379 static const struct UTextFuncs textUTF16Funcs = {
    380     sizeof(UTextFuncs),
    381     0, 0, 0,
    382     textClone,
    383     textNativeLength,
    384     textUTF16Access,
    385     textExtract,
    386     0, 0, 0, 0,
    387     textClose,
    388     0, 0, 0,
    389 };
    390 
    391 static UText* textOpenUTF16(UText* text, const UChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
    392 {
    393     if (U_FAILURE(*status))
    394         return 0;
    395 
    396     if (!string || length > static_cast<unsigned>(numeric_limits<int32_t>::max())) {
    397         *status = U_ILLEGAL_ARGUMENT_ERROR;
    398         return 0;
    399     }
    400 
    401     text = utext_setup(text, 0, status);
    402     if (U_FAILURE(*status)) {
    403         ASSERT(!text);
    404         return 0;
    405     }
    406     textInit(text, &textUTF16Funcs, string, length, priorContext, priorContextLength);
    407     return text;
    408 }
    409 
    410 static UText emptyText = UTEXT_INITIALIZER;
    411 
    412 static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, UBreakIteratorType type, const UChar* string, int length)
    413 {
    414     if (!string)
    415         return 0;
    416 
    417     iterator = ensureIterator(createdIterator, iterator, type);
    418     if (!iterator)
    419         return 0;
    420 
    421     UErrorCode setTextStatus = U_ZERO_ERROR;
    422     ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
    423     if (U_FAILURE(setTextStatus))
    424         return 0;
    425 
    426     return iterator;
    427 }
    428 
    429 static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, UBreakIteratorType type, const LChar* string, int length)
    430 {
    431     if (!string)
    432         return 0;
    433 
    434     iterator = ensureIterator(createdIterator, iterator, type);
    435     if (!iterator)
    436         return 0;
    437 
    438     UTextWithBuffer textLocal;
    439     textLocal.text = emptyText;
    440     textLocal.text.extraSize = sizeof(textLocal.buffer);
    441     textLocal.text.pExtra = textLocal.buffer;
    442 
    443     UErrorCode openStatus = U_ZERO_ERROR;
    444     UText* text = textOpenLatin1(&textLocal, string, length, 0, 0, &openStatus);
    445     if (U_FAILURE(openStatus)) {
    446         LOG_ERROR("textOpenLatin1 failed with status %d", openStatus);
    447         return 0;
    448     }
    449 
    450     UErrorCode setTextStatus = U_ZERO_ERROR;
    451     ubrk_setUText(reinterpret_cast<UBreakIterator*>(iterator), text, &setTextStatus);
    452     if (U_FAILURE(setTextStatus)) {
    453         LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
    454         // FIXME: Do we need to call utext_close(text) here?
    455         return 0;
    456     }
    457 
    458     utext_close(text);
    459 
    460     return iterator;
    461 }
    462 
    463 static TextBreakIterator* wordBreakIterator(const LChar* string, int length)
    464 {
    465     static bool createdWordBreakIterator8 = false;
    466     static TextBreakIterator* staticWordBreakIterator8;
    467     return setUpIterator(createdWordBreakIterator8,
    468         staticWordBreakIterator8, UBRK_WORD, string, length);
    469 }
    470 
    471 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
    472 {
    473     static bool createdWordBreakIterator16 = false;
    474     static TextBreakIterator* staticWordBreakIterator16;
    475     return setUpIterator(createdWordBreakIterator16,
    476         staticWordBreakIterator16, UBRK_WORD, string, length);
    477 }
    478 
    479 TextBreakIterator* wordBreakIterator(const String& string, int start, int length)
    480 {
    481     if (string.isEmpty())
    482         return 0;
    483     if (string.is8Bit())
    484         return wordBreakIterator(string.characters8() + start, length);
    485     return wordBreakIterator(string.characters16() + start, length);
    486 }
    487 
    488 TextBreakIterator* acquireLineBreakIterator(const LChar* string, int length, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
    489 {
    490     UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
    491     if (!iterator)
    492         return 0;
    493 
    494     UTextWithBuffer textLocal;
    495     textLocal.text = emptyText;
    496     textLocal.text.extraSize = sizeof(textLocal.buffer);
    497     textLocal.text.pExtra = textLocal.buffer;
    498 
    499     UErrorCode openStatus = U_ZERO_ERROR;
    500     UText* text = textOpenLatin1(&textLocal, string, length, priorContext, priorContextLength, &openStatus);
    501     if (U_FAILURE(openStatus)) {
    502         LOG_ERROR("textOpenLatin1 failed with status %d", openStatus);
    503         return 0;
    504     }
    505 
    506     UErrorCode setTextStatus = U_ZERO_ERROR;
    507     ubrk_setUText(iterator, text, &setTextStatus);
    508     if (U_FAILURE(setTextStatus)) {
    509         // FIXME: Do we need to call utext_close(text) here?
    510         LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
    511         return 0;
    512     }
    513 
    514     utext_close(text);
    515 
    516     return reinterpret_cast<TextBreakIterator*>(iterator);
    517 }
    518 
    519 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
    520 {
    521     UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
    522     if (!iterator)
    523         return 0;
    524 
    525     UText textLocal = UTEXT_INITIALIZER;
    526 
    527     UErrorCode openStatus = U_ZERO_ERROR;
    528     UText* text = textOpenUTF16(&textLocal, string, length, priorContext, priorContextLength, &openStatus);
    529     if (U_FAILURE(openStatus)) {
    530         LOG_ERROR("textOpenUTF16 failed with status %d", openStatus);
    531         return 0;
    532     }
    533 
    534     UErrorCode setTextStatus = U_ZERO_ERROR;
    535     ubrk_setUText(iterator, text, &setTextStatus);
    536     if (U_FAILURE(setTextStatus)) {
    537         // FIXME: Do we need to call utext_close(text) here?
    538         LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
    539         return 0;
    540     }
    541 
    542     utext_close(text);
    543 
    544     return reinterpret_cast<TextBreakIterator*>(iterator);
    545 }
    546 
    547 void releaseLineBreakIterator(TextBreakIterator* iterator)
    548 {
    549     ASSERT_ARG(iterator, iterator);
    550 
    551     LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
    552 }
    553 
    554 static TextBreakIterator* nonSharedCharacterBreakIterator;
    555 
    556 static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
    557 {
    558     DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
    559     MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
    560     if (nonSharedCharacterBreakIterator != expected)
    561         return false;
    562     nonSharedCharacterBreakIterator = newValue;
    563     return true;
    564 }
    565 
    566 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const String& string)
    567     : m_is8Bit(true)
    568     , m_charaters8(0)
    569     , m_offset(0)
    570     , m_length(0)
    571     , m_iterator(0)
    572 {
    573     if (string.isEmpty())
    574         return;
    575 
    576     m_is8Bit = string.is8Bit();
    577 
    578     if (m_is8Bit) {
    579         m_charaters8 = string.characters8();
    580         m_offset = 0;
    581         m_length = string.length();
    582         return;
    583     }
    584 
    585     createIteratorForBuffer(string.characters16(), string.length());
    586 }
    587 
    588 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, unsigned length)
    589     : m_is8Bit(false)
    590     , m_charaters8(0)
    591     , m_offset(0)
    592     , m_length(0)
    593     , m_iterator(0)
    594 {
    595     createIteratorForBuffer(buffer, length);
    596 }
    597 
    598 void NonSharedCharacterBreakIterator::createIteratorForBuffer(const UChar* buffer, unsigned length)
    599 {
    600     m_iterator = nonSharedCharacterBreakIterator;
    601     bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
    602     m_iterator = setUpIterator(createdIterator, m_iterator, UBRK_CHARACTER, buffer, length);
    603 }
    604 
    605 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
    606 {
    607     if (m_is8Bit)
    608         return;
    609     if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
    610         ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
    611 }
    612 
    613 int NonSharedCharacterBreakIterator::next()
    614 {
    615     if (!m_is8Bit)
    616         return textBreakNext(m_iterator);
    617 
    618     if (m_offset >= m_length)
    619         return TextBreakDone;
    620 
    621     m_offset += clusterLengthStartingAt(m_offset);
    622     return m_offset;
    623 }
    624 
    625 int NonSharedCharacterBreakIterator::current()
    626 {
    627     if (!m_is8Bit)
    628         return textBreakCurrent(m_iterator);
    629     return m_offset;
    630 }
    631 
    632 bool NonSharedCharacterBreakIterator::isBreak(int offset) const
    633 {
    634     if (!m_is8Bit)
    635         return isTextBreak(m_iterator, offset);
    636     return !isLFAfterCR(offset);
    637 }
    638 
    639 int NonSharedCharacterBreakIterator::preceding(int offset) const
    640 {
    641     if (!m_is8Bit)
    642         return textBreakPreceding(m_iterator, offset);
    643     if (offset <= 0)
    644         return TextBreakDone;
    645     if (isLFAfterCR(offset))
    646         return offset - 2;
    647     return offset - 1;
    648 }
    649 
    650 int NonSharedCharacterBreakIterator::following(int offset) const
    651 {
    652     if (!m_is8Bit)
    653         return textBreakFollowing(m_iterator, offset);
    654     if (static_cast<unsigned>(offset) >= m_length)
    655         return TextBreakDone;
    656     return offset + clusterLengthStartingAt(offset);
    657 }
    658 
    659 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
    660 {
    661     static bool createdSentenceBreakIterator = false;
    662     static TextBreakIterator* staticSentenceBreakIterator;
    663     return setUpIterator(createdSentenceBreakIterator,
    664         staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
    665 }
    666 
    667 int textBreakFirst(TextBreakIterator* iterator)
    668 {
    669     return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
    670 }
    671 
    672 int textBreakLast(TextBreakIterator* iterator)
    673 {
    674     return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
    675 }
    676 
    677 int textBreakNext(TextBreakIterator* iterator)
    678 {
    679     return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
    680 }
    681 
    682 int textBreakPrevious(TextBreakIterator* iterator)
    683 {
    684     return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
    685 }
    686 
    687 int textBreakPreceding(TextBreakIterator* iterator, int pos)
    688 {
    689     return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
    690 }
    691 
    692 int textBreakFollowing(TextBreakIterator* iterator, int pos)
    693 {
    694     return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
    695 }
    696 
    697 int textBreakCurrent(TextBreakIterator* iterator)
    698 {
    699     return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
    700 }
    701 
    702 bool isTextBreak(TextBreakIterator* iterator, int position)
    703 {
    704     return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
    705 }
    706 
    707 bool isWordTextBreak(TextBreakIterator* iterator)
    708 {
    709     int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator));
    710     return ruleStatus != UBRK_WORD_NONE;
    711 }
    712 
    713 static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator,
    714     const char* breakRules, const UChar* string, int length)
    715 {
    716     if (!string)
    717         return 0;
    718 
    719     if (!createdIterator) {
    720         UParseError parseStatus;
    721         UErrorCode openStatus = U_ZERO_ERROR;
    722         Vector<UChar> rules;
    723         String(breakRules).appendTo(rules);
    724         iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.data(), rules.size(), 0, 0, &parseStatus, &openStatus));
    725         createdIterator = true;
    726         ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
    727     }
    728     if (!iterator)
    729         return 0;
    730 
    731     UErrorCode setTextStatus = U_ZERO_ERROR;
    732     ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
    733     if (U_FAILURE(setTextStatus))
    734         return 0;
    735 
    736     return iterator;
    737 }
    738 
    739 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
    740 {
    741     // This rule set is based on character-break iterator rules of ICU 4.0
    742     // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
    743     // The major differences from the original ones are listed below:
    744     // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
    745     // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
    746     // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
    747     // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
    748     // * Added rules for regional indicator symbols.
    749     static const char* kRules =
    750         "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
    751         "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
    752         "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
    753         "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
    754         "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
    755         "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
    756         "$L       = [\\p{Grapheme_Cluster_Break = L}];"
    757         "$V       = [\\p{Grapheme_Cluster_Break = V}];"
    758         "$T       = [\\p{Grapheme_Cluster_Break = T}];"
    759         "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
    760         "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
    761         "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
    762         "$HinV    = \\u094D;"              // Devanagari Sign Virama
    763         "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
    764         "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
    765         "$BenV    = \\u09CD;"              // Bengali Sign Virama
    766         "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
    767         "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
    768         "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
    769         "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
    770         "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
    771         "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
    772         "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
    773         "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
    774         "$OriV    = \\u0B4D;"              // Oriya Sign Virama
    775         "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
    776         "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
    777         "$TelV    = \\u0C4D;"              // Telugu Sign Virama
    778         "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
    779         "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
    780         "$KanV    = \\u0CCD;"              // Kannada Sign Virama
    781         "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
    782         "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
    783         "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
    784         "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
    785         "$RI      = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
    786         "!!chain;"
    787         "!!forward;"
    788         "$CR $LF;"
    789         "$L ($L | $V | $LV | $LVT);"
    790         "($LV | $V) ($V | $T);"
    791         "($LVT | $T) $T;"
    792         "[^$Control $CR $LF] $Extend;"
    793         "[^$Control $CR $LF] $SpacingMark;"
    794         "$RI $RI / $RI;"
    795         "$RI $RI;"
    796         "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
    797         "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
    798         "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
    799         "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
    800         "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
    801         "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
    802         "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
    803         "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
    804         "!!reverse;"
    805         "$LF $CR;"
    806         "($L | $V | $LV | $LVT) $L;"
    807         "($V | $T) ($LV | $V);"
    808         "$T ($LVT | $T);"
    809         "$Extend      [^$Control $CR $LF];"
    810         "$SpacingMark [^$Control $CR $LF];"
    811         "$RI $RI / $RI $RI;"
    812         "$RI $RI;"
    813         "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
    814         "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
    815         "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
    816         "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
    817         "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
    818         "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
    819         "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
    820         "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
    821         "!!safe_reverse;"
    822         "!!safe_forward;";
    823     static bool createdCursorMovementIterator = false;
    824     static TextBreakIterator* staticCursorMovementIterator;
    825     return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length);
    826 }
    827 
    828 }
    829