1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: ustr_titlecase_brkiter.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2011may30 14 * created by: Markus W. Scherer 15 * 16 * Titlecasing functions that are based on BreakIterator 17 * were moved here to break dependency cycles among parts of the common library. 18 */ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_BREAK_ITERATION 23 24 #include "unicode/brkiter.h" 25 #include "unicode/casemap.h" 26 #include "unicode/chariter.h" 27 #include "unicode/localpointer.h" 28 #include "unicode/ubrk.h" 29 #include "unicode/ucasemap.h" 30 #include "unicode/utext.h" 31 #include "cmemory.h" 32 #include "uassert.h" 33 #include "ucase.h" 34 #include "ucasemap_imp.h" 35 36 U_NAMESPACE_BEGIN 37 38 /** 39 * Whole-string BreakIterator. 40 * Titlecasing only calls setText(), first(), and next(). 41 * We implement the rest only to satisfy the abstract interface. 42 */ 43 class WholeStringBreakIterator : public BreakIterator { 44 public: 45 WholeStringBreakIterator() : BreakIterator(), length(0) {} 46 ~WholeStringBreakIterator() U_OVERRIDE; 47 UBool operator==(const BreakIterator&) const U_OVERRIDE; 48 BreakIterator *clone() const U_OVERRIDE; 49 static UClassID U_EXPORT2 getStaticClassID(); 50 UClassID getDynamicClassID() const U_OVERRIDE; 51 CharacterIterator &getText() const U_OVERRIDE; 52 UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE; 53 void setText(const UnicodeString &text) U_OVERRIDE; 54 void setText(UText *text, UErrorCode &errorCode) U_OVERRIDE; 55 void adoptText(CharacterIterator* it) U_OVERRIDE; 56 int32_t first() U_OVERRIDE; 57 int32_t last() U_OVERRIDE; 58 int32_t previous() U_OVERRIDE; 59 int32_t next() U_OVERRIDE; 60 int32_t current() const U_OVERRIDE; 61 int32_t following(int32_t offset) U_OVERRIDE; 62 int32_t preceding(int32_t offset) U_OVERRIDE; 63 UBool isBoundary(int32_t offset) U_OVERRIDE; 64 int32_t next(int32_t n) U_OVERRIDE; 65 BreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize, 66 UErrorCode &errorCode) U_OVERRIDE; 67 BreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE; 68 69 private: 70 int32_t length; 71 }; 72 73 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator) 74 75 WholeStringBreakIterator::~WholeStringBreakIterator() {} 76 UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; } 77 BreakIterator *WholeStringBreakIterator::clone() const { return nullptr; } 78 79 CharacterIterator &WholeStringBreakIterator::getText() const { 80 U_ASSERT(FALSE); // really should not be called 81 // Returns a null reference. 82 // Otherwise we would have to define a dummy CharacterIterator, 83 // and either have it as a field and const_cast it to a non-const reference, 84 // or have it via a pointer and return a reference to that. 85 CharacterIterator *none = nullptr; 86 return *none; 87 } 88 UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const { 89 if (U_SUCCESS(errorCode)) { 90 errorCode = U_UNSUPPORTED_ERROR; 91 } 92 return nullptr; 93 } 94 95 void WholeStringBreakIterator::setText(const UnicodeString &text) { 96 length = text.length(); 97 } 98 void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) { 99 if (U_SUCCESS(errorCode)) { 100 int64_t length64 = utext_nativeLength(text); 101 if (length64 <= INT32_MAX) { 102 length = (int32_t)length64; 103 } else { 104 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 105 } 106 } 107 } 108 void WholeStringBreakIterator::adoptText(CharacterIterator* it) { 109 U_ASSERT(FALSE); // should not be called 110 length = it->getLength(); 111 delete it; 112 } 113 114 int32_t WholeStringBreakIterator::first() { return 0; } 115 int32_t WholeStringBreakIterator::last() { return length; } 116 int32_t WholeStringBreakIterator::previous() { return 0; } 117 int32_t WholeStringBreakIterator::next() { return length; } 118 int32_t WholeStringBreakIterator::current() const { return 0; } 119 int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; } 120 int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; } 121 UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; } 122 int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; } 123 124 BreakIterator *WholeStringBreakIterator::createBufferClone( 125 void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) { 126 if (U_SUCCESS(errorCode)) { 127 errorCode = U_UNSUPPORTED_ERROR; 128 } 129 return nullptr; 130 } 131 BreakIterator &WholeStringBreakIterator::refreshInputText( 132 UText * /*input*/, UErrorCode &errorCode) { 133 if (U_SUCCESS(errorCode)) { 134 errorCode = U_UNSUPPORTED_ERROR; 135 } 136 return *this; 137 } 138 139 U_CFUNC 140 BreakIterator *ustrcase_getTitleBreakIterator( 141 const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, 142 LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) { 143 if (U_FAILURE(errorCode)) { return nullptr; } 144 options &= U_TITLECASE_ITERATOR_MASK; 145 if (options != 0 && iter != nullptr) { 146 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 147 return nullptr; 148 } 149 if (iter == nullptr) { 150 switch (options) { 151 case 0: 152 iter = BreakIterator::createWordInstance( 153 locale != nullptr ? *locale : Locale(locID), errorCode); 154 break; 155 case U_TITLECASE_WHOLE_STRING: 156 iter = new WholeStringBreakIterator(); 157 if (iter == nullptr) { 158 errorCode = U_MEMORY_ALLOCATION_ERROR; 159 } 160 break; 161 case U_TITLECASE_SENTENCES: 162 iter = BreakIterator::createSentenceInstance( 163 locale != nullptr ? *locale : Locale(locID), errorCode); 164 break; 165 default: 166 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 167 break; 168 } 169 ownedIter.adoptInstead(iter); 170 } 171 return iter; 172 } 173 174 int32_t CaseMap::toTitle( 175 const char *locale, uint32_t options, BreakIterator *iter, 176 const UChar *src, int32_t srcLength, 177 UChar *dest, int32_t destCapacity, Edits *edits, 178 UErrorCode &errorCode) { 179 LocalPointer<BreakIterator> ownedIter; 180 iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); 181 if(iter==NULL) { 182 return 0; 183 } 184 UnicodeString s(srcLength<0, src, srcLength); 185 iter->setText(s); 186 return ustrcase_map( 187 ustrcase_getCaseLocale(locale), options, iter, 188 dest, destCapacity, 189 src, srcLength, 190 ustrcase_internalToTitle, edits, errorCode); 191 } 192 193 U_NAMESPACE_END 194 195 U_NAMESPACE_USE 196 197 U_CAPI int32_t U_EXPORT2 198 u_strToTitle(UChar *dest, int32_t destCapacity, 199 const UChar *src, int32_t srcLength, 200 UBreakIterator *titleIter, 201 const char *locale, 202 UErrorCode *pErrorCode) { 203 LocalPointer<BreakIterator> ownedIter; 204 BreakIterator *iter = ustrcase_getTitleBreakIterator( 205 nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter), 206 ownedIter, *pErrorCode); 207 if (iter == nullptr) { 208 return 0; 209 } 210 UnicodeString s(srcLength<0, src, srcLength); 211 iter->setText(s); 212 return ustrcase_mapWithOverlap( 213 ustrcase_getCaseLocale(locale), 0, iter, 214 dest, destCapacity, 215 src, srcLength, 216 ustrcase_internalToTitle, *pErrorCode); 217 } 218 219 U_CAPI int32_t U_EXPORT2 220 ucasemap_toTitle(UCaseMap *csm, 221 UChar *dest, int32_t destCapacity, 222 const UChar *src, int32_t srcLength, 223 UErrorCode *pErrorCode) { 224 if (U_FAILURE(*pErrorCode)) { 225 return 0; 226 } 227 if (csm->iter == NULL) { 228 LocalPointer<BreakIterator> ownedIter; 229 BreakIterator *iter = ustrcase_getTitleBreakIterator( 230 nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode); 231 if (iter == nullptr) { 232 return 0; 233 } 234 csm->iter = ownedIter.orphan(); 235 } 236 UnicodeString s(srcLength<0, src, srcLength); 237 csm->iter->setText(s); 238 return ustrcase_map( 239 csm->caseLocale, csm->options, csm->iter, 240 dest, destCapacity, 241 src, srcLength, 242 ustrcase_internalToTitle, NULL, *pErrorCode); 243 } 244 245 #endif // !UCONFIG_NO_BREAK_ITERATION 246