1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2001-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: ucol_cnt.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created 02/22/2001 14 * created by: Vladimir Weinstein 15 * 16 * This module maintains a contraction table structure in expanded form 17 * and provides means to flatten this structure 18 * 19 */ 20 21 #include "unicode/utypes.h" 22 23 #if !UCONFIG_NO_COLLATION 24 25 #include "unicode/uchar.h" 26 #include "ucol_cnt.h" 27 #include "cmemory.h" 28 29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) { 30 if(tbl->position == tbl->size) { 31 uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t)); 32 if(newData == NULL) { 33 *status = U_MEMORY_ALLOCATION_ERROR; 34 return; 35 } 36 UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar)); 37 if(newCPs == NULL) { 38 uprv_free(newData); 39 *status = U_MEMORY_ALLOCATION_ERROR; 40 return; 41 } 42 tbl->CEs = newData; 43 tbl->codePoints = newCPs; 44 tbl->size *= 2; 45 } 46 } 47 48 U_CAPI CntTable* U_EXPORT2 49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/ 50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) { 51 if(U_FAILURE(*status)) { 52 return 0; 53 } 54 CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable)); 55 if(tbl == NULL) { 56 *status = U_MEMORY_ALLOCATION_ERROR; 57 return NULL; 58 } 59 tbl->mapping = mapping; 60 tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *)); 61 if(tbl->elements == NULL) { 62 *status = U_MEMORY_ALLOCATION_ERROR; 63 uprv_free(tbl); 64 return NULL; 65 } 66 tbl->capacity = INIT_EXP_TABLE_SIZE; 67 uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *)); 68 tbl->size = 0; 69 tbl->position = 0; 70 tbl->CEs = NULL; 71 tbl->codePoints = NULL; 72 tbl->offsets = NULL; 73 tbl->currentTag = NOT_FOUND_TAG; 74 return tbl; 75 } 76 77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) { 78 ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable)); 79 if(el == NULL) { 80 goto outOfMemory; 81 } 82 el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); 83 if(el->CEs == NULL) { 84 goto outOfMemory; 85 } 86 87 el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar)); 88 if(el->codePoints == NULL) { 89 uprv_free(el->CEs); 90 goto outOfMemory; 91 } 92 93 el->position = 0; 94 el->size = INIT_EXP_TABLE_SIZE; 95 uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); 96 uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar)); 97 98 table->elements[table->size] = el; 99 100 //uhash_put(table->elements, (void *)table->size, el, status); 101 102 *key = table->size++; 103 104 if(table->size == table->capacity) { 105 ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *)); 106 // do realloc 107 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/ 108 if(newElements == NULL) { 109 uprv_free(el->codePoints); 110 uprv_free(el->CEs); 111 goto outOfMemory; 112 } 113 ContractionTable **oldElements = table->elements; 114 uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *)); 115 uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *)); 116 table->capacity *= 2; 117 table->elements = newElements; 118 uprv_free(oldElements); 119 } 120 121 return el; 122 123 outOfMemory: 124 *status = U_MEMORY_ALLOCATION_ERROR; 125 if (el) uprv_free(el); 126 return NULL; 127 } 128 129 U_CAPI int32_t U_EXPORT2 130 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) { 131 int32_t i = 0, j = 0; 132 if(U_FAILURE(*status) || table->size == 0) { 133 return 0; 134 } 135 136 table->position = 0; 137 138 if(table->offsets != NULL) { 139 uprv_free(table->offsets); 140 } 141 table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t)); 142 if(table->offsets == NULL) { 143 *status = U_MEMORY_ALLOCATION_ERROR; 144 return 0; 145 } 146 147 148 /* See how much memory we need */ 149 for(i = 0; i<table->size; i++) { 150 table->offsets[i] = table->position+mainOffset; 151 table->position += table->elements[i]->position; 152 } 153 154 /* Allocate it */ 155 if(table->CEs != NULL) { 156 uprv_free(table->CEs); 157 } 158 table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t)); 159 if(table->CEs == NULL) { 160 *status = U_MEMORY_ALLOCATION_ERROR; 161 uprv_free(table->offsets); 162 table->offsets = NULL; 163 return 0; 164 } 165 uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t)); 166 167 if(table->codePoints != NULL) { 168 uprv_free(table->codePoints); 169 } 170 table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar)); 171 if(table->codePoints == NULL) { 172 *status = U_MEMORY_ALLOCATION_ERROR; 173 uprv_free(table->offsets); 174 table->offsets = NULL; 175 uprv_free(table->CEs); 176 table->CEs = NULL; 177 return 0; 178 } 179 uprv_memset(table->codePoints, '?', table->position*sizeof(UChar)); 180 181 /* Now stuff the things in*/ 182 183 UChar *cpPointer = table->codePoints; 184 uint32_t *CEPointer = table->CEs; 185 for(i = 0; i<table->size; i++) { 186 int32_t size = table->elements[i]->position; 187 uint8_t ccMax = 0, ccMin = 255, cc = 0; 188 for(j = 1; j<size; j++) { 189 cc = u_getCombiningClass(table->elements[i]->codePoints[j]); 190 if(cc>ccMax) { 191 ccMax = cc; 192 } 193 if(cc<ccMin) { 194 ccMin = cc; 195 } 196 *(cpPointer+j) = table->elements[i]->codePoints[j]; 197 } 198 *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax; 199 200 uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t)); 201 for(j = 0; j<size; j++) { 202 if(isCntTableElement(*(CEPointer+j))) { 203 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]); 204 } 205 } 206 cpPointer += size; 207 CEPointer += size; 208 } 209 210 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the 211 // start of the flat file). However, what is done below is just wrong and it affects building of 212 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also, 213 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's 214 // feasible. 215 uint32_t CE; 216 for(i = 0; i<=0x10FFFF; i++) { 217 /*CE = ucmpe32_get(table->mapping, i);*/ 218 CE = utrie_get32(table->mapping, i, NULL); 219 if(isCntTableElement(CE)) { 220 CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]); 221 /*ucmpe32_set(table->mapping, i, CE);*/ 222 utrie_set32(table->mapping, i, CE); 223 } 224 } 225 226 227 return table->position; 228 } 229 230 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) { 231 ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable)); 232 if(r == NULL) { 233 goto outOfMemory; 234 } 235 236 r->position = t->position; 237 r->size = t->size; 238 239 r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size); 240 if(r->codePoints == NULL) { 241 goto outOfMemory; 242 } 243 r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size); 244 if(r->CEs == NULL) { 245 uprv_free(r->codePoints); 246 goto outOfMemory; 247 } 248 uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size); 249 uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size); 250 251 return r; 252 253 outOfMemory: 254 *status = U_MEMORY_ALLOCATION_ERROR; 255 if (r) uprv_free(r); 256 return NULL; 257 } 258 259 U_CAPI CntTable* U_EXPORT2 260 uprv_cnttab_clone(CntTable *t, UErrorCode *status) { 261 if(U_FAILURE(*status)) { 262 return NULL; 263 } 264 int32_t i = 0; 265 CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable)); 266 /* test for NULL */ 267 if (r == NULL) { 268 goto outOfMemory; 269 } 270 r->position = t->position; 271 r->size = t->size; 272 r->capacity = t->capacity; 273 274 r->mapping = t->mapping; 275 276 r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *)); 277 /* test for NULL */ 278 if (r->elements == NULL) { 279 goto outOfMemory; 280 } 281 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *)); 282 283 for(i = 0; i<t->size; i++) { 284 r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status); 285 } 286 287 if(t->CEs != NULL) { 288 r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t)); 289 /* test for NULL */ 290 if (r->CEs == NULL) { 291 uprv_free(r->elements); 292 goto outOfMemory; 293 } 294 uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t)); 295 } else { 296 r->CEs = NULL; 297 } 298 299 if(t->codePoints != NULL) { 300 r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar)); 301 /* test for NULL */ 302 if (r->codePoints == NULL) { 303 uprv_free(r->CEs); 304 uprv_free(r->elements); 305 goto outOfMemory; 306 } 307 uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar)); 308 } else { 309 r->codePoints = NULL; 310 } 311 312 if(t->offsets != NULL) { 313 r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t)); 314 /* test for NULL */ 315 if (r->offsets == NULL) { 316 uprv_free(r->codePoints); 317 uprv_free(r->CEs); 318 uprv_free(r->elements); 319 goto outOfMemory; 320 } 321 uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t)); 322 } else { 323 r->offsets = NULL; 324 } 325 326 return r; 327 328 outOfMemory: 329 *status = U_MEMORY_ALLOCATION_ERROR; 330 if (r) uprv_free(r); 331 return NULL; 332 } 333 334 U_CAPI void U_EXPORT2 335 uprv_cnttab_close(CntTable *table) { 336 int32_t i = 0; 337 for(i = 0; i<table->size; i++) { 338 uprv_free(table->elements[i]->CEs); 339 uprv_free(table->elements[i]->codePoints); 340 uprv_free(table->elements[i]); 341 } 342 uprv_free(table->elements); 343 uprv_free(table->CEs); 344 uprv_free(table->offsets); 345 uprv_free(table->codePoints); 346 uprv_free(table); 347 } 348 349 /* this is for adding non contractions */ 350 U_CAPI uint32_t U_EXPORT2 351 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) { 352 element &= 0xFFFFFF; 353 354 ContractionTable *tbl = NULL; 355 if(U_FAILURE(*status)) { 356 return 0; 357 } 358 359 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { 360 return 0; 361 } 362 363 tbl->CEs[tbl->position-1] = value; 364 365 return(constructContractCE(table->currentTag, element)); 366 } 367 368 369 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */ 370 U_CAPI uint32_t U_EXPORT2 371 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) { 372 373 ContractionTable *tbl = NULL; 374 375 if(U_FAILURE(*status)) { 376 return 0; 377 } 378 element &= 0xFFFFFF; 379 380 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { 381 tbl = addATableElement(table, &element, status); 382 if (U_FAILURE(*status)) { 383 return 0; 384 } 385 } 386 387 uprv_growTable(tbl, status); 388 389 uint32_t offset = 0; 390 391 392 while(tbl->codePoints[offset] < codePoint && offset<tbl->position) { 393 offset++; 394 } 395 396 uint32_t i = tbl->position; 397 for(i = tbl->position; i > offset; i--) { 398 tbl->CEs[i] = tbl->CEs[i-1]; 399 tbl->codePoints[i] = tbl->codePoints[i-1]; 400 } 401 402 tbl->CEs[offset] = value; 403 tbl->codePoints[offset] = codePoint; 404 405 tbl->position++; 406 407 return(constructContractCE(table->currentTag, element)); 408 } 409 410 411 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */ 412 U_CAPI uint32_t U_EXPORT2 413 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) { 414 415 element &= 0xFFFFFF; 416 417 ContractionTable *tbl = NULL; 418 419 if(U_FAILURE(*status)) { 420 return 0; 421 } 422 423 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { 424 tbl = addATableElement(table, &element, status); 425 if (U_FAILURE(*status)) { 426 return 0; 427 } 428 } 429 430 uprv_growTable(tbl, status); 431 432 tbl->CEs[tbl->position] = value; 433 tbl->codePoints[tbl->position] = codePoint; 434 435 tbl->position++; 436 437 return(constructContractCE(table->currentTag, element)); 438 } 439 440 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */ 441 U_CAPI uint32_t U_EXPORT2 442 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) { 443 444 element &= 0xFFFFFF; 445 ContractionTable *tbl = NULL; 446 447 if(U_FAILURE(*status)) { 448 return 0; 449 } 450 451 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { 452 tbl = addATableElement(table, &element, status); 453 if (U_FAILURE(*status)) { 454 return 0; 455 } 456 457 } 458 459 if(offset >= tbl->size) { 460 *status = U_INDEX_OUTOFBOUNDS_ERROR; 461 return 0; 462 } 463 tbl->CEs[offset] = value; 464 tbl->codePoints[offset] = codePoint; 465 466 //return(offset); 467 return(constructContractCE(table->currentTag, element)); 468 } 469 470 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) { 471 element &= 0xFFFFFF; 472 ContractionTable *tbl = NULL; 473 474 if(element != 0xFFFFFF) { 475 tbl = table->elements[element]; /* This could also return NULL */ 476 } 477 return tbl; 478 } 479 480 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) { 481 uint32_t position = 0; 482 if(tbl == NULL) { 483 return -1; 484 } 485 486 while(codePoint > tbl->codePoints[position]) { 487 position++; 488 if(position > tbl->position) { 489 return -1; 490 } 491 } 492 if (codePoint == tbl->codePoints[position]) { 493 return position; 494 } else { 495 return -1; 496 } 497 } 498 499 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) { 500 if(tbl == NULL) { 501 return UCOL_NOT_FOUND; 502 } 503 if((uint32_t)position > tbl->position || position == -1) { 504 return UCOL_NOT_FOUND; 505 } else { 506 return tbl->CEs[position]; 507 } 508 } 509 510 U_CAPI int32_t U_EXPORT2 511 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) { 512 513 if(U_FAILURE(*status)) { 514 return 0; 515 } 516 517 return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint); 518 } 519 520 U_CAPI uint32_t U_EXPORT2 521 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) { 522 if(U_FAILURE(*status)) { 523 return UCOL_NOT_FOUND; 524 } 525 526 return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position)); 527 } 528 529 U_CAPI uint32_t U_EXPORT2 530 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) { 531 if(U_FAILURE(*status)) { 532 return UCOL_NOT_FOUND; 533 } 534 ContractionTable *tbl = _cnttab_getContractionTable(table, element); 535 return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint)); 536 } 537 538 U_CAPI UBool U_EXPORT2 539 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) { 540 if(U_FAILURE(*status)) { 541 return FALSE; 542 } 543 544 while(*(ztString)!=0) { 545 element = uprv_cnttab_findCE(table, element, *(ztString), status); 546 if(element == UCOL_NOT_FOUND) { 547 return FALSE; 548 } 549 if(!isCntTableElement(element)) { 550 return TRUE; 551 } 552 ztString++; 553 } 554 return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND); 555 } 556 557 U_CAPI uint32_t U_EXPORT2 558 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) { 559 560 element &= 0xFFFFFF; 561 ContractionTable *tbl = NULL; 562 563 if(U_FAILURE(*status)) { 564 return 0; 565 } 566 567 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { 568 return 0; 569 } 570 571 uint32_t position = 0; 572 573 while(codePoint > tbl->codePoints[position]) { 574 position++; 575 if(position > tbl->position) { 576 return UCOL_NOT_FOUND; 577 } 578 } 579 if (codePoint == tbl->codePoints[position]) { 580 tbl->CEs[position] = newCE; 581 return element; 582 } else { 583 return UCOL_NOT_FOUND; 584 } 585 } 586 587 #endif /* #if !UCONFIG_NO_COLLATION */ 588