Home | History | Annotate | Download | only in common

Lines Matching full:trie

16 *   This is a common implementation of a "folded" trie.
44 /* Building a trie ----------------------------------------------------------*/
51 UNewTrie *trie;
61 trie=fillIn;
63 trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie));
64 if(trie==NULL) {
68 uprv_memset(trie, 0, sizeof(UNewTrie));
69 trie->isAllocated= (UBool)(fillIn==NULL);
72 trie->data=aliasData;
73 trie->isDataAllocated=FALSE;
75 trie->data=(uint32_t *)uprv_malloc(maxDataLength*4);
76 if(trie->data==NULL) {
77 uprv_free(trie);
80 trie->isDataAllocated=TRUE;
93 /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
94 trie->index[i++]=j;
100 trie->dataLength=j;
102 trie->data[--j]=initialValue;
105 trie->leadUnitValue=leadUnitValue;
106 trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
107 trie->dataCapacity=maxDataLength;
108 trie->isLatin1Linear=latin1Linear;
109 trie->isCompacted=FALSE;
110 return trie;
115 UNewTrie *trie;
135 trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
138 if(trie==NULL) {
141 uprv_memcpy(trie->index, other->index, sizeof(trie->index));
142 uprv_memcpy(trie->data, other->data, other->dataLength*4);
143 trie->dataLength=other->dataLength;
144 trie->isDataAllocated=isDataAllocated;
147 return trie;
151 utrie_close(UNewTrie *trie) {
152 if(trie!=NULL) {
153 if(trie->isDataAllocated) {
154 uprv_free(trie->data);
155 trie->data=NULL;
157 if(trie->isAllocated) {
158 uprv_free(trie);
164 utrie_getData(UNewTrie *trie, int32_t *pLength) {
165 if(trie==NULL || pLength==NULL) {
169 *pLength=trie->dataLength;
170 return trie->data;
174 utrie_allocDataBlock(UNewTrie *trie) {
177 newBlock=trie->dataLength;
179 if(newTop>trie->dataCapacity) {
183 trie->dataLength=newTop;
194 utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
198 indexValue=trie->index[c];
204 newBlock=utrie_allocDataBlock(trie);
209 trie->index[c]=newBlock;
212 uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
220 utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) {
223 /* valid, uncompacted trie and valid c? */
224 if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
228 block=utrie_getDataBlock(trie, c);
233 trie->data[block+(c&UTRIE_MASK)]=value;
238 utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
241 /* valid, uncompacted trie and valid c? */
242 if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
249 block=trie->index[c>>UTRIE_SHIFT];
254 return trie->data[ABS(block)+(c&UTRIE_MASK)];
282 utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
291 /* valid, uncompacted trie and valid indexes? */
292 if( trie==NULL || trie->isCompacted ||
301 initialValue=trie->data[0];
306 block=utrie_getDataBlock(trie, start);
313 utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
317 utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
337 block=trie->index[start>>UTRIE_SHIFT];
340 utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
341 } else if(trie->data[-block]!=value && (block==0 || overwrite)) {
344 trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
347 repeatBlock=utrie_getDataBlock(trie, start);
353 trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
354 utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
363 block=utrie_getDataBlock(trie, start);
368 utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
394 * a compact area on top of the BMP-part of the trie index,
403 utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) {
413 idx=trie->index;
428 if(trie->leadUnitValue==trie->data[0]) {
432 block=utrie_allocDataBlock(trie);
438 utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE);
442 trie->index[c]=block;
473 value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
474 if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
475 if(!utrie_set32(trie, U16_LEAD(c), value)) {
528 printf("trie index count: BMP %ld all Unicode %ld folded %ld\n",
532 trie->indexLength=indexLength;
536 * Set a value in the trie index map to indicate which data block
544 _findUnusedBlocks(UNewTrie *trie) {
548 uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4);
551 for(i=0; i<trie->indexLength; ++i) {
552 trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0;
556 trie->map[0]=0;
576 * Compact a folded build-time trie.
588 utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {
595 /* valid, uncompacted trie? */
596 if(trie==NULL) {
600 if(trie->isCompacted) {
607 _findUnusedBlocks(trie);
610 if(trie->isLatin1Linear && UTRIE_SHIFT<=8) {
617 for(start=newStart; start<trie->dataLength;) {
625 if(trie->map[start>>UTRIE_SHIFT]<0) {
635 (i=_findSameDataBlock(trie->data, newStart, start,
640 trie->map[start>>UTRIE_SHIFT]=i;
653 i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i);
661 trie->map[start>>UTRIE_SHIFT]=newStart-i;
666 trie->data[newStart++]=trie->data[start++];
670 trie->map[start>>UTRIE_SHIFT]=newStart;
672 trie->data[newStart++]=trie->data[start++];
675 trie->map[start>>UTRIE_SHIFT]=start;
682 for(i=0; i<trie->indexLength; ++i) {
683 trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT];
688 printf("compacting trie: count of 32-bit words %lu->%lu\n",
689 (long)trie->dataLength, (long)newStart);
692 trie->dataLength=newStart;
704 * which fits into 16-bit trie values;
719 defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
724 initialValue=trie->data[0];
727 value=utrie_get32(trie, start, &inBlockZero);
740 utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
755 if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) {
765 if(!trie->isCompacted) {
767 utrie_compact(trie, FALSE, pErrorCode);
770 utrie_fold(trie, getFoldedValue, pErrorCode);
773 utrie_compact(trie, TRUE, pErrorCode);
775 trie->isCompacted=TRUE;
782 if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) {
786 length=sizeof(UTrieHeader)+2*trie->indexLength;
788 length+=2*trie->dataLength;
790 length+=4*trie->dataLength;
799 (long)trie->indexLength, (long)trie->dataLength, (long)length);
806 header->signature=0x54726965; /* "Trie" */
812 if(trie->isLatin1Linear) {
816 header->indexLength=trie->indexLength;
817 header->dataLength=trie->dataLength;
822 p=(uint32_t *)trie->index;
824 for(i=trie->indexLength; i>0; --i) {
825 *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT);
829 p=trie->data;
830 for(i=trie->dataLength; i>0; --i) {
835 p=(uint32_t *)trie->index;
837 for(i=trie->indexLength; i>0; --i) {
842 uprv_memcpy(dest16, trie->data, 4*trie->dataLength);
855 utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) {
864 /* enough data for a trie header? */
885 trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0);
888 trie->indexLength=header->indexLength;
889 trie->dataLength=header->dataLength;
894 if(length<2*trie->indexLength) {
899 trie->index=p16;
900 p16+=trie->indexLength;
901 length-=2*trie->indexLength;
905 if(length<4*trie->dataLength) {
909 trie->data32=(const uint32_t *)p16;
910 trie->initialValue=trie->data32[0];
911 length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
913 if(length<2*trie->dataLength) {
919 trie->data32=NULL;
920 trie->initialValue=trie->index[trie->indexLength];
921 length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
924 trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
930 utrie_unserializeDummy(UTrie *trie,
943 /* calculate the actual size of the dummy trie data */
948 trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT;
949 trie->dataLength=latin1Length;
951 trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH;
954 actualLength=trie->indexLength*2;
956 actualLength+=trie->dataLength*2;
958 actualLength+=trie->dataLength*4;
961 /* enough space for the dummy trie? */
967 trie->isLatin1Linear=TRUE;
968 trie->initialValue=initialValue;
972 trie->index=p16;
976 block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT);
977 limit=trie->indexLength;
992 trie->data32=NULL;
995 p16+=trie->indexLength;
1011 uprv_memset(p16, 0, trie->indexLength*2);
1023 trie->data32=p32=(uint32_t *)(p16+trie->indexLength);
1039 trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
1054 * The values are transformed from the raw trie entries by the enumValue function.
1057 utrie_enum(const UTrie *trie,
1067 if(trie==NULL || trie->index==NULL || enumRange==NULL) {
1074 idx=trie->index;
1075 data32=trie->data32;
1077 /* get the enumeration value that corresponds to an initial-value trie data entry */
1078 initialValue=enumValue(context, trie->initialValue);
1081 nullBlock=trie->indexLength;
1165 offset=trie->getFoldingOffset(value);