1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2010, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 /* 9 * File coleitr.cpp 10 * 11 * 12 * 13 * Created by: Helena Shih 14 * 15 * Modification History: 16 * 17 * Date Name Description 18 * 19 * 6/23/97 helena Adding comments to make code more readable. 20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java 21 * 12/10/99 aliu Ported Thai collation support from Java. 22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) 23 * 02/19/01 swquek Removed CollationElementsIterator() since it is 24 * private constructor and no calls are made to it 25 */ 26 27 #include "unicode/utypes.h" 28 29 #if !UCONFIG_NO_COLLATION 30 31 #include "unicode/coleitr.h" 32 #include "unicode/ustring.h" 33 #include "ucol_imp.h" 34 #include "cmemory.h" 35 36 37 /* Constants --------------------------------------------------------------- */ 38 39 U_NAMESPACE_BEGIN 40 41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) 42 43 /* CollationElementIterator public constructor/destructor ------------------ */ 44 45 CollationElementIterator::CollationElementIterator( 46 const CollationElementIterator& other) 47 : UObject(other), isDataOwned_(TRUE) 48 { 49 UErrorCode status = U_ZERO_ERROR; 50 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 51 &status); 52 53 *this = other; 54 } 55 56 CollationElementIterator::~CollationElementIterator() 57 { 58 if (isDataOwned_) { 59 ucol_closeElements(m_data_); 60 } 61 } 62 63 /* CollationElementIterator public methods --------------------------------- */ 64 65 int32_t CollationElementIterator::getOffset() const 66 { 67 return ucol_getOffset(m_data_); 68 } 69 70 /** 71 * Get the ordering priority of the next character in the string. 72 * @return the next character's ordering. Returns NULLORDER if an error has 73 * occured or if the end of string has been reached 74 */ 75 int32_t CollationElementIterator::next(UErrorCode& status) 76 { 77 return ucol_next(m_data_, &status); 78 } 79 80 UBool CollationElementIterator::operator!=( 81 const CollationElementIterator& other) const 82 { 83 return !(*this == other); 84 } 85 86 UBool CollationElementIterator::operator==( 87 const CollationElementIterator& that) const 88 { 89 if (this == &that || m_data_ == that.m_data_) { 90 return TRUE; 91 } 92 93 // option comparison 94 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) 95 { 96 return FALSE; 97 } 98 99 // the constructor and setText always sets a length 100 // and we only compare the string not the contents of the normalization 101 // buffer 102 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string); 103 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string); 104 105 if (thislength != thatlength) { 106 return FALSE; 107 } 108 109 if (uprv_memcmp(m_data_->iteratordata_.string, 110 that.m_data_->iteratordata_.string, 111 thislength * U_SIZEOF_UCHAR) != 0) { 112 return FALSE; 113 } 114 if (getOffset() != that.getOffset()) { 115 return FALSE; 116 } 117 118 // checking normalization buffer 119 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { 120 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { 121 return FALSE; 122 } 123 // both are in the normalization buffer 124 if (m_data_->iteratordata_.pos 125 - m_data_->iteratordata_.writableBuffer.getBuffer() 126 != that.m_data_->iteratordata_.pos 127 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { 128 // not in the same position in the normalization buffer 129 return FALSE; 130 } 131 } 132 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { 133 return FALSE; 134 } 135 // checking ce position 136 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) 137 == (that.m_data_->iteratordata_.CEpos 138 - that.m_data_->iteratordata_.CEs); 139 } 140 141 /** 142 * Get the ordering priority of the previous collation element in the string. 143 * @param status the error code status. 144 * @return the previous element's ordering. Returns NULLORDER if an error has 145 * occured or if the start of string has been reached. 146 */ 147 int32_t CollationElementIterator::previous(UErrorCode& status) 148 { 149 return ucol_previous(m_data_, &status); 150 } 151 152 /** 153 * Resets the cursor to the beginning of the string. 154 */ 155 void CollationElementIterator::reset() 156 { 157 ucol_reset(m_data_); 158 } 159 160 void CollationElementIterator::setOffset(int32_t newOffset, 161 UErrorCode& status) 162 { 163 ucol_setOffset(m_data_, newOffset, &status); 164 } 165 166 /** 167 * Sets the source to the new source string. 168 */ 169 void CollationElementIterator::setText(const UnicodeString& source, 170 UErrorCode& status) 171 { 172 if (U_FAILURE(status)) { 173 return; 174 } 175 176 int32_t length = source.length(); 177 UChar *string = NULL; 178 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 179 uprv_free((UChar *)m_data_->iteratordata_.string); 180 } 181 m_data_->isWritable = TRUE; 182 if (length > 0) { 183 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 184 /* test for NULL */ 185 if (string == NULL) { 186 status = U_MEMORY_ALLOCATION_ERROR; 187 return; 188 } 189 u_memcpy(string, source.getBuffer(), length); 190 } 191 else { 192 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 193 /* test for NULL */ 194 if (string == NULL) { 195 status = U_MEMORY_ALLOCATION_ERROR; 196 return; 197 } 198 *string = 0; 199 } 200 /* Free offsetBuffer before initializing it. */ 201 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); 202 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 203 &m_data_->iteratordata_, &status); 204 205 m_data_->reset_ = TRUE; 206 } 207 208 // Sets the source to the new character iterator. 209 void CollationElementIterator::setText(CharacterIterator& source, 210 UErrorCode& status) 211 { 212 if (U_FAILURE(status)) 213 return; 214 215 int32_t length = source.getLength(); 216 UChar *buffer = NULL; 217 218 if (length == 0) { 219 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 220 /* test for NULL */ 221 if (buffer == NULL) { 222 status = U_MEMORY_ALLOCATION_ERROR; 223 return; 224 } 225 *buffer = 0; 226 } 227 else { 228 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 229 /* test for NULL */ 230 if (buffer == NULL) { 231 status = U_MEMORY_ALLOCATION_ERROR; 232 return; 233 } 234 /* 235 Using this constructor will prevent buffer from being removed when 236 string gets removed 237 */ 238 UnicodeString string; 239 source.getText(string); 240 u_memcpy(buffer, string.getBuffer(), length); 241 } 242 243 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 244 uprv_free((UChar *)m_data_->iteratordata_.string); 245 } 246 m_data_->isWritable = TRUE; 247 /* Free offsetBuffer before initializing it. */ 248 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); 249 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 250 &m_data_->iteratordata_, &status); 251 m_data_->reset_ = TRUE; 252 } 253 254 int32_t CollationElementIterator::strengthOrder(int32_t order) const 255 { 256 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); 257 // Mask off the unwanted differences. 258 if (s == UCOL_PRIMARY) { 259 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; 260 } 261 else if (s == UCOL_SECONDARY) { 262 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; 263 } 264 265 return order; 266 } 267 268 /* CollationElementIterator private constructors/destructors --------------- */ 269 270 /** 271 * This is the "real" constructor for this class; it constructs an iterator 272 * over the source text using the specified collator 273 */ 274 CollationElementIterator::CollationElementIterator( 275 const UnicodeString& sourceText, 276 const RuleBasedCollator* order, 277 UErrorCode& status) 278 : isDataOwned_(TRUE) 279 { 280 if (U_FAILURE(status)) { 281 return; 282 } 283 284 int32_t length = sourceText.length(); 285 UChar *string = NULL; 286 287 if (length > 0) { 288 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 289 /* test for NULL */ 290 if (string == NULL) { 291 status = U_MEMORY_ALLOCATION_ERROR; 292 return; 293 } 294 /* 295 Using this constructor will prevent buffer from being removed when 296 string gets removed 297 */ 298 u_memcpy(string, sourceText.getBuffer(), length); 299 } 300 else { 301 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 302 /* test for NULL */ 303 if (string == NULL) { 304 status = U_MEMORY_ALLOCATION_ERROR; 305 return; 306 } 307 *string = 0; 308 } 309 m_data_ = ucol_openElements(order->ucollator, string, length, &status); 310 311 /* Test for buffer overflows */ 312 if (U_FAILURE(status)) { 313 return; 314 } 315 m_data_->isWritable = TRUE; 316 } 317 318 /** 319 * This is the "real" constructor for this class; it constructs an iterator over 320 * the source text using the specified collator 321 */ 322 CollationElementIterator::CollationElementIterator( 323 const CharacterIterator& sourceText, 324 const RuleBasedCollator* order, 325 UErrorCode& status) 326 : isDataOwned_(TRUE) 327 { 328 if (U_FAILURE(status)) 329 return; 330 331 // **** should I just drop this test? **** 332 /* 333 if ( sourceText.endIndex() != 0 ) 334 { 335 // A CollationElementIterator is really a two-layered beast. 336 // Internally it uses a Normalizer to munge the source text into a form 337 // where all "composed" Unicode characters (such as \u00FC) are split into a 338 // normal character and a combining accent character. 339 // Afterward, CollationElementIterator does its own processing to handle 340 // expanding and contracting collation sequences, ignorables, and so on. 341 342 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL 343 ? Normalizer::NO_OP : order->getDecomposition(); 344 345 text = new Normalizer(sourceText, decomp); 346 if (text == NULL) 347 status = U_MEMORY_ALLOCATION_ERROR; 348 } 349 */ 350 int32_t length = sourceText.getLength(); 351 UChar *buffer; 352 if (length > 0) { 353 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 354 /* test for NULL */ 355 if (buffer == NULL) { 356 status = U_MEMORY_ALLOCATION_ERROR; 357 return; 358 } 359 /* 360 Using this constructor will prevent buffer from being removed when 361 string gets removed 362 */ 363 UnicodeString string(buffer, length, length); 364 ((CharacterIterator &)sourceText).getText(string); 365 const UChar *temp = string.getBuffer(); 366 u_memcpy(buffer, temp, length); 367 } 368 else { 369 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 370 /* test for NULL */ 371 if (buffer == NULL) { 372 status = U_MEMORY_ALLOCATION_ERROR; 373 return; 374 } 375 *buffer = 0; 376 } 377 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); 378 379 /* Test for buffer overflows */ 380 if (U_FAILURE(status)) { 381 return; 382 } 383 m_data_->isWritable = TRUE; 384 } 385 386 /* CollationElementIterator protected methods ----------------------------- */ 387 388 const CollationElementIterator& CollationElementIterator::operator=( 389 const CollationElementIterator& other) 390 { 391 if (this != &other) 392 { 393 UCollationElements *ucolelem = this->m_data_; 394 UCollationElements *otherucolelem = other.m_data_; 395 collIterate *coliter = &(ucolelem->iteratordata_); 396 collIterate *othercoliter = &(otherucolelem->iteratordata_); 397 int length = 0; 398 399 // checking only UCOL_ITER_HASLEN is not enough here as we may be in 400 // the normalization buffer 401 length = (int)(othercoliter->endp - othercoliter->string); 402 403 ucolelem->reset_ = otherucolelem->reset_; 404 ucolelem->isWritable = TRUE; 405 406 /* create a duplicate of string */ 407 if (length > 0) { 408 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); 409 if(coliter->string != NULL) { 410 uprv_memcpy((UChar *)coliter->string, othercoliter->string, 411 length * U_SIZEOF_UCHAR); 412 } else { // Error: couldn't allocate memory. No copying should be done 413 length = 0; 414 } 415 } 416 else { 417 coliter->string = NULL; 418 } 419 420 /* start and end of string */ 421 coliter->endp = coliter->string + length; 422 423 /* handle writable buffer here */ 424 425 if (othercoliter->flags & UCOL_ITER_INNORMBUF) { 426 coliter->writableBuffer = othercoliter->writableBuffer; 427 coliter->writableBuffer.getTerminatedBuffer(); 428 } 429 430 /* current position */ 431 if (othercoliter->pos >= othercoliter->string && 432 othercoliter->pos <= othercoliter->endp) 433 { 434 coliter->pos = coliter->string + 435 (othercoliter->pos - othercoliter->string); 436 } 437 else { 438 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 439 (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); 440 } 441 442 /* CE buffer */ 443 int32_t CEsize; 444 if (coliter->extendCEs) { 445 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); 446 CEsize = sizeof(othercoliter->extendCEs); 447 if (CEsize > 0) { 448 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); 449 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); 450 } 451 coliter->toReturn = coliter->extendCEs + 452 (othercoliter->toReturn - othercoliter->extendCEs); 453 coliter->CEpos = coliter->extendCEs + CEsize; 454 } else { 455 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); 456 if (CEsize > 0) { 457 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); 458 } 459 coliter->toReturn = coliter->CEs + 460 (othercoliter->toReturn - othercoliter->CEs); 461 coliter->CEpos = coliter->CEs + CEsize; 462 } 463 464 if (othercoliter->fcdPosition != NULL) { 465 coliter->fcdPosition = coliter->string + 466 (othercoliter->fcdPosition 467 - othercoliter->string); 468 } 469 else { 470 coliter->fcdPosition = NULL; 471 } 472 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; 473 coliter->origFlags = othercoliter->origFlags; 474 coliter->coll = othercoliter->coll; 475 this->isDataOwned_ = TRUE; 476 } 477 478 return *this; 479 } 480 481 U_NAMESPACE_END 482 483 #endif /* #if !UCONFIG_NO_COLLATION */ 484 485 /* eof */ 486