Home | History | Annotate | Download | only in i18n
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2001-2008, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  ucol_cnt.cpp
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created 02/22/2001
     14  *   created by: Vladimir Weinstein
     15  *
     16  * This module maintains a contraction table structure in expanded form
     17  * and provides means to flatten this structure
     18  *
     19  */
     20 
     21 #include "unicode/utypes.h"
     22 
     23 #if !UCONFIG_NO_COLLATION
     24 
     25 #include "unicode/uchar.h"
     26 #include "ucol_cnt.h"
     27 #include "cmemory.h"
     28 
     29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
     30     if(tbl->position == tbl->size) {
     31         uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
     32         if(newData == NULL) {
     33             *status = U_MEMORY_ALLOCATION_ERROR;
     34             return;
     35         }
     36         UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
     37         if(newCPs == NULL) {
     38             uprv_free(newData);
     39             *status = U_MEMORY_ALLOCATION_ERROR;
     40             return;
     41         }
     42         tbl->CEs = newData;
     43         tbl->codePoints = newCPs;
     44         tbl->size *= 2;
     45     }
     46 }
     47 
     48 U_CAPI CntTable*  U_EXPORT2
     49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
     50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
     51     if(U_FAILURE(*status)) {
     52         return 0;
     53     }
     54     CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
     55     if(tbl == NULL) {
     56         *status = U_MEMORY_ALLOCATION_ERROR;
     57         return NULL;
     58     }
     59     tbl->mapping = mapping;
     60     tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
     61     if(tbl->elements == NULL) {
     62         *status = U_MEMORY_ALLOCATION_ERROR;
     63         uprv_free(tbl);
     64         return NULL;
     65     }
     66     tbl->capacity = INIT_EXP_TABLE_SIZE;
     67     uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
     68     tbl->size = 0;
     69     tbl->position = 0;
     70     tbl->CEs = NULL;
     71     tbl->codePoints = NULL;
     72     tbl->offsets = NULL;
     73     tbl->currentTag = NOT_FOUND_TAG;
     74     return tbl;
     75 }
     76 
     77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
     78     ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
     79     if(el == NULL) {
     80         goto outOfMemory;
     81     }
     82     el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
     83     if(el->CEs == NULL) {
     84         goto outOfMemory;
     85     }
     86 
     87     el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
     88     if(el->codePoints == NULL) {
     89         uprv_free(el->CEs);
     90         goto outOfMemory;
     91     }
     92 
     93     el->position = 0;
     94     el->size = INIT_EXP_TABLE_SIZE;
     95     uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
     96     uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
     97 
     98     table->elements[table->size] = el;
     99 
    100     //uhash_put(table->elements, (void *)table->size, el, status);
    101 
    102     *key = table->size++;
    103 
    104     if(table->size == table->capacity) {
    105         ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
    106         // do realloc
    107         /*        table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
    108         if(newElements == NULL) {
    109             uprv_free(el->codePoints);
    110             uprv_free(el->CEs);
    111             goto outOfMemory;
    112         }
    113         ContractionTable **oldElements = table->elements;
    114         uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
    115         uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
    116         table->capacity *= 2;
    117         table->elements = newElements;
    118         uprv_free(oldElements);
    119     }
    120 
    121     return el;
    122 
    123 outOfMemory:
    124     *status = U_MEMORY_ALLOCATION_ERROR;
    125     if (el) uprv_free(el);
    126     return NULL;
    127 }
    128 
    129 U_CAPI int32_t  U_EXPORT2
    130 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
    131     int32_t i = 0, j = 0;
    132     if(U_FAILURE(*status) || table->size == 0) {
    133         return 0;
    134     }
    135 
    136     table->position = 0;
    137 
    138     if(table->offsets != NULL) {
    139         uprv_free(table->offsets);
    140     }
    141     table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
    142     if(table->offsets == NULL) {
    143         *status = U_MEMORY_ALLOCATION_ERROR;
    144         return 0;
    145     }
    146 
    147 
    148     /* See how much memory we need */
    149     for(i = 0; i<table->size; i++) {
    150         table->offsets[i] = table->position+mainOffset;
    151         table->position += table->elements[i]->position;
    152     }
    153 
    154     /* Allocate it */
    155     if(table->CEs != NULL) {
    156         uprv_free(table->CEs);
    157     }
    158     table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
    159     if(table->CEs == NULL) {
    160         *status = U_MEMORY_ALLOCATION_ERROR;
    161         uprv_free(table->offsets);
    162         table->offsets = NULL;
    163         return 0;
    164     }
    165     uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
    166 
    167     if(table->codePoints != NULL) {
    168         uprv_free(table->codePoints);
    169     }
    170     table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
    171     if(table->codePoints == NULL) {
    172         *status = U_MEMORY_ALLOCATION_ERROR;
    173         uprv_free(table->offsets);
    174         table->offsets = NULL;
    175         uprv_free(table->CEs);
    176         table->CEs = NULL;
    177         return 0;
    178     }
    179     uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
    180 
    181     /* Now stuff the things in*/
    182 
    183     UChar *cpPointer = table->codePoints;
    184     uint32_t *CEPointer = table->CEs;
    185     for(i = 0; i<table->size; i++) {
    186         int32_t size = table->elements[i]->position;
    187         uint8_t ccMax = 0, ccMin = 255, cc = 0;
    188         for(j = 1; j<size; j++) {
    189             cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
    190             if(cc>ccMax) {
    191                 ccMax = cc;
    192             }
    193             if(cc<ccMin) {
    194                 ccMin = cc;
    195             }
    196             *(cpPointer+j) = table->elements[i]->codePoints[j];
    197         }
    198         *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
    199 
    200         uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
    201         for(j = 0; j<size; j++) {
    202             if(isCntTableElement(*(CEPointer+j))) {
    203                 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
    204             }
    205         }
    206         cpPointer += size;
    207         CEPointer += size;
    208     }
    209 
    210     // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
    211     // start of the flat file). However, what is done below is just wrong and it affects building of
    212     // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
    213     // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
    214     // feasible.
    215     uint32_t CE;
    216     for(i = 0; i<=0x10FFFF; i++) {
    217         /*CE = ucmpe32_get(table->mapping, i);*/
    218         CE = utrie_get32(table->mapping, i, NULL);
    219         if(isCntTableElement(CE)) {
    220             CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
    221             /*ucmpe32_set(table->mapping, i, CE);*/
    222             utrie_set32(table->mapping, i, CE);
    223         }
    224     }
    225 
    226 
    227     return table->position;
    228 }
    229 
    230 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
    231     ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
    232     if(r == NULL) {
    233         goto outOfMemory;
    234     }
    235 
    236     r->position = t->position;
    237     r->size = t->size;
    238 
    239     r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
    240     if(r->codePoints == NULL) {
    241         goto outOfMemory;
    242     }
    243     r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
    244     if(r->CEs == NULL) {
    245         uprv_free(r->codePoints);
    246         goto outOfMemory;
    247     }
    248     uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
    249     uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
    250 
    251     return r;
    252 
    253 outOfMemory:
    254     *status = U_MEMORY_ALLOCATION_ERROR;
    255     if (r) uprv_free(r);
    256     return NULL;
    257 }
    258 
    259 U_CAPI CntTable* U_EXPORT2
    260 uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
    261     if(U_FAILURE(*status)) {
    262         return NULL;
    263     }
    264     int32_t i = 0;
    265     CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
    266     /* test for NULL */
    267     if (r == NULL) {
    268         goto outOfMemory;
    269     }
    270     r->position = t->position;
    271     r->size = t->size;
    272     r->capacity = t->capacity;
    273 
    274     r->mapping = t->mapping;
    275 
    276     r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
    277     /* test for NULL */
    278     if (r->elements == NULL) {
    279         goto outOfMemory;
    280     }
    281     //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
    282 
    283     for(i = 0; i<t->size; i++) {
    284         r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
    285     }
    286 
    287     if(t->CEs != NULL) {
    288         r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
    289         /* test for NULL */
    290         if (r->CEs == NULL) {
    291             uprv_free(r->elements);
    292             goto outOfMemory;
    293         }
    294         uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
    295     } else {
    296         r->CEs = NULL;
    297     }
    298 
    299     if(t->codePoints != NULL) {
    300         r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
    301         /* test for NULL */
    302         if (r->codePoints == NULL) {
    303             uprv_free(r->CEs);
    304             uprv_free(r->elements);
    305             goto outOfMemory;
    306         }
    307         uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
    308     } else {
    309         r->codePoints = NULL;
    310     }
    311 
    312     if(t->offsets != NULL) {
    313         r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
    314         /* test for NULL */
    315         if (r->offsets == NULL) {
    316             uprv_free(r->codePoints);
    317             uprv_free(r->CEs);
    318             uprv_free(r->elements);
    319             goto outOfMemory;
    320         }
    321         uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
    322     } else {
    323         r->offsets = NULL;
    324     }
    325 
    326     return r;
    327 
    328 outOfMemory:
    329     *status = U_MEMORY_ALLOCATION_ERROR;
    330     if (r) uprv_free(r);
    331     return NULL;
    332 }
    333 
    334 U_CAPI void  U_EXPORT2
    335 uprv_cnttab_close(CntTable *table) {
    336     int32_t i = 0;
    337     for(i = 0; i<table->size; i++) {
    338         uprv_free(table->elements[i]->CEs);
    339         uprv_free(table->elements[i]->codePoints);
    340         uprv_free(table->elements[i]);
    341     }
    342     uprv_free(table->elements);
    343     uprv_free(table->CEs);
    344     uprv_free(table->offsets);
    345     uprv_free(table->codePoints);
    346     uprv_free(table);
    347 }
    348 
    349 /* this is for adding non contractions */
    350 U_CAPI uint32_t  U_EXPORT2
    351 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
    352     element &= 0xFFFFFF;
    353 
    354     ContractionTable *tbl = NULL;
    355     if(U_FAILURE(*status)) {
    356         return 0;
    357     }
    358 
    359     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
    360         return 0;
    361     }
    362 
    363     tbl->CEs[tbl->position-1] = value;
    364 
    365     return(constructContractCE(table->currentTag, element));
    366 }
    367 
    368 
    369 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
    370 U_CAPI uint32_t  U_EXPORT2
    371 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
    372 
    373     ContractionTable *tbl = NULL;
    374 
    375     if(U_FAILURE(*status)) {
    376         return 0;
    377     }
    378     element &= 0xFFFFFF;
    379 
    380     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
    381         tbl = addATableElement(table, &element, status);
    382         if (U_FAILURE(*status)) {
    383             return 0;
    384         }
    385     }
    386 
    387     uprv_growTable(tbl, status);
    388 
    389     uint32_t offset = 0;
    390 
    391 
    392     while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
    393         offset++;
    394     }
    395 
    396     uint32_t i = tbl->position;
    397     for(i = tbl->position; i > offset; i--) {
    398         tbl->CEs[i] = tbl->CEs[i-1];
    399         tbl->codePoints[i] = tbl->codePoints[i-1];
    400     }
    401 
    402     tbl->CEs[offset] = value;
    403     tbl->codePoints[offset] = codePoint;
    404 
    405     tbl->position++;
    406 
    407     return(constructContractCE(table->currentTag, element));
    408 }
    409 
    410 
    411 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
    412 U_CAPI uint32_t  U_EXPORT2
    413 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
    414 
    415     element &= 0xFFFFFF;
    416 
    417     ContractionTable *tbl = NULL;
    418 
    419     if(U_FAILURE(*status)) {
    420         return 0;
    421     }
    422 
    423     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
    424         tbl = addATableElement(table, &element, status);
    425         if (U_FAILURE(*status)) {
    426             return 0;
    427         }
    428     }
    429 
    430     uprv_growTable(tbl, status);
    431 
    432     tbl->CEs[tbl->position] = value;
    433     tbl->codePoints[tbl->position] = codePoint;
    434 
    435     tbl->position++;
    436 
    437     return(constructContractCE(table->currentTag, element));
    438 }
    439 
    440 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
    441 U_CAPI uint32_t  U_EXPORT2
    442 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
    443 
    444     element &= 0xFFFFFF;
    445     ContractionTable *tbl = NULL;
    446 
    447     if(U_FAILURE(*status)) {
    448         return 0;
    449     }
    450 
    451     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
    452         tbl = addATableElement(table, &element, status);
    453         if (U_FAILURE(*status)) {
    454             return 0;
    455         }
    456 
    457     }
    458 
    459     if(offset >= tbl->size) {
    460         *status = U_INDEX_OUTOFBOUNDS_ERROR;
    461         return 0;
    462     }
    463     tbl->CEs[offset] = value;
    464     tbl->codePoints[offset] = codePoint;
    465 
    466     //return(offset);
    467     return(constructContractCE(table->currentTag, element));
    468 }
    469 
    470 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
    471     element &= 0xFFFFFF;
    472     ContractionTable *tbl = NULL;
    473 
    474     if(element != 0xFFFFFF) {
    475         tbl = table->elements[element]; /* This could also return NULL */
    476     }
    477     return tbl;
    478 }
    479 
    480 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
    481     uint32_t position = 0;
    482     if(tbl == NULL) {
    483         return -1;
    484     }
    485 
    486     while(codePoint > tbl->codePoints[position]) {
    487         position++;
    488         if(position > tbl->position) {
    489             return -1;
    490         }
    491     }
    492     if (codePoint == tbl->codePoints[position]) {
    493         return position;
    494     } else {
    495         return -1;
    496     }
    497 }
    498 
    499 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
    500     if(tbl == NULL) {
    501         return UCOL_NOT_FOUND;
    502     }
    503     if((uint32_t)position > tbl->position || position == -1) {
    504         return UCOL_NOT_FOUND;
    505     } else {
    506         return tbl->CEs[position];
    507     }
    508 }
    509 
    510 U_CAPI int32_t  U_EXPORT2
    511 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
    512 
    513     if(U_FAILURE(*status)) {
    514         return 0;
    515     }
    516 
    517     return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
    518 }
    519 
    520 U_CAPI uint32_t  U_EXPORT2
    521 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
    522     if(U_FAILURE(*status)) {
    523         return UCOL_NOT_FOUND;
    524     }
    525 
    526     return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
    527 }
    528 
    529 U_CAPI uint32_t  U_EXPORT2
    530 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
    531     if(U_FAILURE(*status)) {
    532         return UCOL_NOT_FOUND;
    533     }
    534     ContractionTable *tbl = _cnttab_getContractionTable(table, element);
    535     return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
    536 }
    537 
    538 U_CAPI UBool  U_EXPORT2
    539 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
    540     if(U_FAILURE(*status)) {
    541         return FALSE;
    542     }
    543 
    544     while(*(ztString)!=0) {
    545         element = uprv_cnttab_findCE(table, element, *(ztString), status);
    546         if(element == UCOL_NOT_FOUND) {
    547             return FALSE;
    548         }
    549         if(!isCntTableElement(element)) {
    550             return TRUE;
    551         }
    552         ztString++;
    553     }
    554     return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
    555 }
    556 
    557 U_CAPI uint32_t  U_EXPORT2
    558 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
    559 
    560     element &= 0xFFFFFF;
    561     ContractionTable *tbl = NULL;
    562 
    563     if(U_FAILURE(*status)) {
    564         return 0;
    565     }
    566 
    567     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
    568         return 0;
    569     }
    570 
    571     uint32_t position = 0;
    572 
    573     while(codePoint > tbl->codePoints[position]) {
    574         position++;
    575         if(position > tbl->position) {
    576             return UCOL_NOT_FOUND;
    577         }
    578     }
    579     if (codePoint == tbl->codePoints[position]) {
    580         tbl->CEs[position] = newCE;
    581         return element;
    582     } else {
    583         return UCOL_NOT_FOUND;
    584     }
    585 }
    586 
    587 #endif /* #if !UCONFIG_NO_COLLATION */
    588