1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 /* 9 * File coleitr.cpp 10 * 11 * 12 * 13 * Created by: Helena Shih 14 * 15 * Modification History: 16 * 17 * Date Name Description 18 * 19 * 6/23/97 helena Adding comments to make code more readable. 20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java 21 * 12/10/99 aliu Ported Thai collation support from Java. 22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) 23 * 02/19/01 swquek Removed CollationElementsIterator() since it is 24 * private constructor and no calls are made to it 25 */ 26 27 #include "unicode/utypes.h" 28 29 #if !UCONFIG_NO_COLLATION 30 31 #include "unicode/coleitr.h" 32 #include "unicode/ustring.h" 33 #include "ucol_imp.h" 34 #include "uassert.h" 35 #include "cmemory.h" 36 37 38 /* Constants --------------------------------------------------------------- */ 39 40 U_NAMESPACE_BEGIN 41 42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) 43 44 /* CollationElementIterator public constructor/destructor ------------------ */ 45 46 CollationElementIterator::CollationElementIterator( 47 const CollationElementIterator& other) 48 : UObject(other), isDataOwned_(TRUE) 49 { 50 UErrorCode status = U_ZERO_ERROR; 51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 52 &status); 53 54 *this = other; 55 } 56 57 CollationElementIterator::~CollationElementIterator() 58 { 59 if (isDataOwned_) { 60 ucol_closeElements(m_data_); 61 } 62 } 63 64 /* CollationElementIterator public methods --------------------------------- */ 65 66 int32_t CollationElementIterator::getOffset() const 67 { 68 return ucol_getOffset(m_data_); 69 } 70 71 /** 72 * Get the ordering priority of the next character in the string. 73 * @return the next character's ordering. Returns NULLORDER if an error has 74 * occured or if the end of string has been reached 75 */ 76 int32_t CollationElementIterator::next(UErrorCode& status) 77 { 78 return ucol_next(m_data_, &status); 79 } 80 81 UBool CollationElementIterator::operator!=( 82 const CollationElementIterator& other) const 83 { 84 return !(*this == other); 85 } 86 87 UBool CollationElementIterator::operator==( 88 const CollationElementIterator& that) const 89 { 90 if (this == &that || m_data_ == that.m_data_) { 91 return TRUE; 92 } 93 94 // option comparison 95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) 96 { 97 return FALSE; 98 } 99 100 // the constructor and setText always sets a length 101 // and we only compare the string not the contents of the normalization 102 // buffer 103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string); 104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string); 105 106 if (thislength != thatlength) { 107 return FALSE; 108 } 109 110 if (uprv_memcmp(m_data_->iteratordata_.string, 111 that.m_data_->iteratordata_.string, 112 thislength * U_SIZEOF_UCHAR) != 0) { 113 return FALSE; 114 } 115 if (getOffset() != that.getOffset()) { 116 return FALSE; 117 } 118 119 // checking normalization buffer 120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { 121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { 122 return FALSE; 123 } 124 // both are in the normalization buffer 125 if (m_data_->iteratordata_.pos 126 - m_data_->iteratordata_.writableBuffer.getBuffer() 127 != that.m_data_->iteratordata_.pos 128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { 129 // not in the same position in the normalization buffer 130 return FALSE; 131 } 132 } 133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { 134 return FALSE; 135 } 136 // checking ce position 137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) 138 == (that.m_data_->iteratordata_.CEpos 139 - that.m_data_->iteratordata_.CEs); 140 } 141 142 /** 143 * Get the ordering priority of the previous collation element in the string. 144 * @param status the error code status. 145 * @return the previous element's ordering. Returns NULLORDER if an error has 146 * occured or if the start of string has been reached. 147 */ 148 int32_t CollationElementIterator::previous(UErrorCode& status) 149 { 150 return ucol_previous(m_data_, &status); 151 } 152 153 /** 154 * Resets the cursor to the beginning of the string. 155 */ 156 void CollationElementIterator::reset() 157 { 158 ucol_reset(m_data_); 159 } 160 161 void CollationElementIterator::setOffset(int32_t newOffset, 162 UErrorCode& status) 163 { 164 ucol_setOffset(m_data_, newOffset, &status); 165 } 166 167 /** 168 * Sets the source to the new source string. 169 */ 170 void CollationElementIterator::setText(const UnicodeString& source, 171 UErrorCode& status) 172 { 173 if (U_FAILURE(status)) { 174 return; 175 } 176 177 int32_t length = source.length(); 178 UChar *string = NULL; 179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 180 uprv_free((UChar *)m_data_->iteratordata_.string); 181 } 182 m_data_->isWritable = TRUE; 183 if (length > 0) { 184 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 185 /* test for NULL */ 186 if (string == NULL) { 187 status = U_MEMORY_ALLOCATION_ERROR; 188 return; 189 } 190 u_memcpy(string, source.getBuffer(), length); 191 } 192 else { 193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 194 /* test for NULL */ 195 if (string == NULL) { 196 status = U_MEMORY_ALLOCATION_ERROR; 197 return; 198 } 199 *string = 0; 200 } 201 /* Free offsetBuffer before initializing it. */ 202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); 203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 204 &m_data_->iteratordata_, &status); 205 206 m_data_->reset_ = TRUE; 207 } 208 209 // Sets the source to the new character iterator. 210 void CollationElementIterator::setText(CharacterIterator& source, 211 UErrorCode& status) 212 { 213 if (U_FAILURE(status)) 214 return; 215 216 int32_t length = source.getLength(); 217 UChar *buffer = NULL; 218 219 if (length == 0) { 220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 221 /* test for NULL */ 222 if (buffer == NULL) { 223 status = U_MEMORY_ALLOCATION_ERROR; 224 return; 225 } 226 *buffer = 0; 227 } 228 else { 229 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 230 /* test for NULL */ 231 if (buffer == NULL) { 232 status = U_MEMORY_ALLOCATION_ERROR; 233 return; 234 } 235 /* 236 Using this constructor will prevent buffer from being removed when 237 string gets removed 238 */ 239 UnicodeString string; 240 source.getText(string); 241 u_memcpy(buffer, string.getBuffer(), length); 242 } 243 244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 245 uprv_free((UChar *)m_data_->iteratordata_.string); 246 } 247 m_data_->isWritable = TRUE; 248 /* Free offsetBuffer before initializing it. */ 249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); 250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 251 &m_data_->iteratordata_, &status); 252 m_data_->reset_ = TRUE; 253 } 254 255 int32_t CollationElementIterator::strengthOrder(int32_t order) const 256 { 257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); 258 // Mask off the unwanted differences. 259 if (s == UCOL_PRIMARY) { 260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; 261 } 262 else if (s == UCOL_SECONDARY) { 263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; 264 } 265 266 return order; 267 } 268 269 /* CollationElementIterator private constructors/destructors --------------- */ 270 271 /** 272 * This is the "real" constructor for this class; it constructs an iterator 273 * over the source text using the specified collator 274 */ 275 CollationElementIterator::CollationElementIterator( 276 const UnicodeString& sourceText, 277 const RuleBasedCollator* order, 278 UErrorCode& status) 279 : isDataOwned_(TRUE) 280 { 281 if (U_FAILURE(status)) { 282 return; 283 } 284 285 int32_t length = sourceText.length(); 286 UChar *string = NULL; 287 288 if (length > 0) { 289 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 290 /* test for NULL */ 291 if (string == NULL) { 292 status = U_MEMORY_ALLOCATION_ERROR; 293 return; 294 } 295 /* 296 Using this constructor will prevent buffer from being removed when 297 string gets removed 298 */ 299 u_memcpy(string, sourceText.getBuffer(), length); 300 } 301 else { 302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 303 /* test for NULL */ 304 if (string == NULL) { 305 status = U_MEMORY_ALLOCATION_ERROR; 306 return; 307 } 308 *string = 0; 309 } 310 m_data_ = ucol_openElements(order->ucollator, string, length, &status); 311 312 /* Test for buffer overflows */ 313 if (U_FAILURE(status)) { 314 return; 315 } 316 m_data_->isWritable = TRUE; 317 } 318 319 /** 320 * This is the "real" constructor for this class; it constructs an iterator over 321 * the source text using the specified collator 322 */ 323 CollationElementIterator::CollationElementIterator( 324 const CharacterIterator& sourceText, 325 const RuleBasedCollator* order, 326 UErrorCode& status) 327 : isDataOwned_(TRUE) 328 { 329 if (U_FAILURE(status)) 330 return; 331 332 // **** should I just drop this test? **** 333 /* 334 if ( sourceText.endIndex() != 0 ) 335 { 336 // A CollationElementIterator is really a two-layered beast. 337 // Internally it uses a Normalizer to munge the source text into a form 338 // where all "composed" Unicode characters (such as \u00FC) are split into a 339 // normal character and a combining accent character. 340 // Afterward, CollationElementIterator does its own processing to handle 341 // expanding and contracting collation sequences, ignorables, and so on. 342 343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL 344 ? Normalizer::NO_OP : order->getDecomposition(); 345 346 text = new Normalizer(sourceText, decomp); 347 if (text == NULL) 348 status = U_MEMORY_ALLOCATION_ERROR; 349 } 350 */ 351 int32_t length = sourceText.getLength(); 352 UChar *buffer; 353 if (length > 0) { 354 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 355 /* test for NULL */ 356 if (buffer == NULL) { 357 status = U_MEMORY_ALLOCATION_ERROR; 358 return; 359 } 360 /* 361 Using this constructor will prevent buffer from being removed when 362 string gets removed 363 */ 364 UnicodeString string(buffer, length, length); 365 ((CharacterIterator &)sourceText).getText(string); 366 const UChar *temp = string.getBuffer(); 367 u_memcpy(buffer, temp, length); 368 } 369 else { 370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 371 /* test for NULL */ 372 if (buffer == NULL) { 373 status = U_MEMORY_ALLOCATION_ERROR; 374 return; 375 } 376 *buffer = 0; 377 } 378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); 379 380 /* Test for buffer overflows */ 381 if (U_FAILURE(status)) { 382 return; 383 } 384 m_data_->isWritable = TRUE; 385 } 386 387 /* CollationElementIterator protected methods ----------------------------- */ 388 389 const CollationElementIterator& CollationElementIterator::operator=( 390 const CollationElementIterator& other) 391 { 392 if (this != &other) 393 { 394 UCollationElements *ucolelem = this->m_data_; 395 UCollationElements *otherucolelem = other.m_data_; 396 collIterate *coliter = &(ucolelem->iteratordata_); 397 collIterate *othercoliter = &(otherucolelem->iteratordata_); 398 int length = 0; 399 400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in 401 // the normalization buffer 402 length = (int)(othercoliter->endp - othercoliter->string); 403 404 ucolelem->reset_ = otherucolelem->reset_; 405 ucolelem->isWritable = TRUE; 406 407 /* create a duplicate of string */ 408 if (length > 0) { 409 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); 410 if(coliter->string != NULL) { 411 uprv_memcpy((UChar *)coliter->string, othercoliter->string, 412 length * U_SIZEOF_UCHAR); 413 } else { // Error: couldn't allocate memory. No copying should be done 414 length = 0; 415 } 416 } 417 else { 418 coliter->string = NULL; 419 } 420 421 /* start and end of string */ 422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + length; 423 424 /* handle writable buffer here */ 425 426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) { 427 coliter->writableBuffer = othercoliter->writableBuffer; 428 coliter->writableBuffer.getTerminatedBuffer(); 429 } 430 431 /* current position */ 432 if (othercoliter->pos >= othercoliter->string && 433 othercoliter->pos <= othercoliter->endp) 434 { 435 U_ASSERT(coliter->string != NULL); 436 coliter->pos = coliter->string + 437 (othercoliter->pos - othercoliter->string); 438 } 439 else { 440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); 442 } 443 444 /* CE buffer */ 445 int32_t CEsize; 446 if (coliter->extendCEs) { 447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); 448 CEsize = sizeof(othercoliter->extendCEs); 449 if (CEsize > 0) { 450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); 451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); 452 } 453 coliter->toReturn = coliter->extendCEs + 454 (othercoliter->toReturn - othercoliter->extendCEs); 455 coliter->CEpos = coliter->extendCEs + CEsize; 456 } else { 457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); 458 if (CEsize > 0) { 459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); 460 } 461 coliter->toReturn = coliter->CEs + 462 (othercoliter->toReturn - othercoliter->CEs); 463 coliter->CEpos = coliter->CEs + CEsize; 464 } 465 466 if (othercoliter->fcdPosition != NULL) { 467 U_ASSERT(coliter->string != NULL); 468 coliter->fcdPosition = coliter->string + 469 (othercoliter->fcdPosition 470 - othercoliter->string); 471 } 472 else { 473 coliter->fcdPosition = NULL; 474 } 475 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; 476 coliter->origFlags = othercoliter->origFlags; 477 coliter->coll = othercoliter->coll; 478 this->isDataOwned_ = TRUE; 479 } 480 481 return *this; 482 } 483 484 U_NAMESPACE_END 485 486 #endif /* #if !UCONFIG_NO_COLLATION */ 487 488 /* eof */ 489