Lines Matching defs:mbcsData
35 struct MBCSData {
71 MBCSStartMappings(MBCSData *mbcsData);
74 MBCSAddToUnicode(MBCSData *mbcsData,
84 MBCSSingleAddFromUnicode(MBCSData *mbcsData,
90 MBCSAddFromUnicode(MBCSData *mbcsData,
96 MBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData *staticData);
128 static MBCSData gDummy;
130 U_CFUNC const MBCSData *
132 uprv_memset(&gDummy, 0, sizeof(MBCSData));
152 MBCSInit(MBCSData *mbcsData, UCMFile *ucm) {
153 uprv_memset(mbcsData, 0, sizeof(MBCSData));
155 mbcsData->ucm=ucm; /* aliased, not owned */
157 mbcsData->newConverter.close=MBCSClose;
158 mbcsData->newConverter.isValid=MBCSIsValid;
159 mbcsData->newConverter.addTable=MBCSAddTable;
160 mbcsData->newConverter.write=MBCSWrite;
165 MBCSData *mbcsData=(MBCSData *)uprv_malloc(sizeof(MBCSData));
166 if(mbcsData==NULL) {
171 MBCSInit(mbcsData, ucm);
172 return &mbcsData->newConverter;
176 MBCSDestruct(MBCSData *mbcsData) {
177 uprv_free(mbcsData->unicodeCodeUnits);
178 uprv_free(mbcsData->fromUBytes);
183 MBCSData *mbcsData=(MBCSData *)cnvData;
184 if(mbcsData!=NULL) {
185 MBCSDestruct(mbcsData);
186 uprv_free(mbcsData);
191 MBCSStartMappings(MBCSData *mbcsData) {
199 sum=mbcsData->ucm->states.countToUCodeUnits;
205 mbcsData->unicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t));
206 if(mbcsData->unicodeCodeUnits==NULL) {
212 mbcsData->unicodeCodeUnits[i]=0xfffe;
217 maxCharLength=mbcsData->ucm->states.maxCharLength;
227 mbcsData->fromUBytes=(uint8_t *)uprv_malloc(sum);
228 if(mbcsData->fromUBytes==NULL) {
232 uprv_memset(mbcsData->fromUBytes, 0, sum);
275 mbcsData->stage1[i]=sum;
278 mbcsData->stage2Top=stage2NullLength+stage2AllocLength; /* ==sum */
289 mbcsData->stage2Single[mbcsData->stage1[0]+i]=sum;
295 mbcsData->stage2[mbcsData->stage1[0]+i]=sum;
302 mbcsData->stageUTF8[i]=sum;
311 mbcsData->stage3Top=(stage3NullLength+stage3AllocLength)*maxCharLength; /* ==sum*maxCharLength */
318 setFallback(MBCSData *mbcsData, uint32_t offset, UChar32 c) {
319 int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset);
322 mbcsData->toUFallbacks[i].codePoint=c;
326 i=mbcsData->countToUFallbacks;
331 mbcsData->toUFallbacks[i].offset=offset;
332 mbcsData->toUFallbacks[i].codePoint=c;
333 mbcsData->countToUFallbacks=i+1;
341 removeFallback(MBCSData *mbcsData, uint32_t offset) {
342 int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset);
347 toUFallbacks=mbcsData->toUFallbacks;
348 limit=mbcsData->countToUFallbacks;
354 mbcsData->countToUFallbacks=limit-1;
368 MBCSAddToUnicode(MBCSData *mbcsData,
377 if(mbcsData->ucm->states.countStates==0) {
383 if(length==2 && mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO) {
393 entry=mbcsData->ucm->states.stateTable[state][bytes[i++]];
454 mbcsData->ucm->states.stateTable[state][bytes[i-1]]=entry;
461 if((old=mbcsData->unicodeCodeUnits[offset])!=0xfffe || (old=removeFallback(mbcsData, offset))!=-1) {
478 if(mbcsData->unicodeCodeUnits[offset]==0xfffe) {
479 return setFallback(mbcsData, offset, c);
482 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
490 old=mbcsData->unicodeCodeUnits[offset];
496 real=0x10000+((old&0x3ff)<<10)+((mbcsData->unicodeCodeUnits[offset+1])&0x3ff);
498 real=mbcsData->unicodeCodeUnits[offset+1];
515 mbcsData->unicodeCodeUnits[offset++]=0xe001;
516 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
519 mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xdbc0+(c>>10));
520 mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff));
525 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
528 mbcsData->unicodeCodeUnits[offset++]=0xe000;
529 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
532 mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xd7c0+(c>>10));
533 mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff));
553 MBCSData *mbcsData=(MBCSData *)cnvData;
555 return (UBool)(1==ucm_countChars(&mbcsData->ucm->states, bytes, length));
559 MBCSSingleAddFromUnicode(MBCSData *mbcsData,
581 stage3=(uint16_t *)mbcsData->fromUBytes;
586 if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) {
591 if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) {
593 newBlock=mbcsData->stage2Top;
594 if(mbcsData->utf8Friendly) {
596 while(min<newBlock && mbcsData->stage2Single[newBlock-1]==0) {
611 mbcsData->stage1[idx]=(uint16_t)newBlock;
612 mbcsData->stage2Top=newTop;
616 idx=mbcsData->stage1[idx]+nextOffset;
617 if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) {
625 if(mbcsData->stage2Single[idx]==0) {
627 newBlock=mbcsData->stage3Top;
628 if(mbcsData->utf8Friendly) {
643 mbcsData->stage2Single[i++]=(uint16_t)newBlock;
646 mbcsData->stage3Top=newTop; /* ==newBlock */
650 p=stage3+mbcsData->stage2Single[idx]+nextOffset;
677 MBCSAddFromUnicode(MBCSData *mbcsData,
689 maxCharLength=mbcsData->ucm->states.maxCharLength;
691 if( mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO &&
712 stage3=mbcsData->fromUBytes;
716 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
721 if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) {
723 newBlock=mbcsData->stage2Top;
724 if(mbcsData->utf8Friendly) {
726 while(min<newBlock && mbcsData->stage2[newBlock-1]==0) {
744 mbcsData->stage1[i++]=(uint16_t)newBlock;
747 mbcsData->stage2Top=newTop; /* ==newBlock */
751 idx=mbcsData->stage1[idx]+nextOffset;
752 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
760 if(mbcsData->stage2[idx]==0) {
762 newBlock=mbcsData->stage3Top;
763 if(mbcsData->utf8Friendly && nextOffset>=MBCS_STAGE_3_GRANULARITY) {
788 mbcsData->stage2[i++]=(newBlock/MBCS_STAGE_3_GRANULARITY)/maxCharLength;
791 mbcsData->stage3Top=newTop; /* ==newBlock */
794 stage3Index=MBCS_STAGE_3_GRANULARITY*(uint32_t)(uint16_t)mbcsData->stage2[idx];
797 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
818 mbcsData->utf8Max=0xfeff;
824 mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index;
874 if((mbcsData->stage2[idx+(nextOffset>>MBCS_STAGE_2_SHIFT)]&(1UL<<(16+(c&0xf))))!=0 || old!=0) {
888 mbcsData
895 MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
922 if(mbcsData->utf8Friendly && flag<=1 && c<=mbcsData->utf8Max && (bytes[0]==0 || flag==1)) {
931 if(mbcsData->omitFromU && flag!=0) {
942 MBCSData *mbcsData;
957 mbcsData=(MBCSData *)cnvData;
958 maxCharLength=mbcsData->ucm->states.maxCharLength;
965 mbcsData->utf8Friendly=utf8Friendly=(UBool)((table->flagsType&UCM_FLAGS_EXPLICIT)!=0);
967 mbcsData->utf8Max=MBCS_UTF8_MAX;
969 mbcsData->omitFromU=TRUE;
972 mbcsData->utf8Max=0;
979 if(!MBCSStartMappings(mbcsData)) {
1008 if( mbcsData->omitFromU && f<=1 &&
1009 mbcsData->utf8Max<c && c<=0xffff &&
1010 mbcsData->utf8Max<0xfeff
1012 mbcsData->utf8Max=0xffff;
1022 isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1025 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1026 } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
1027 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1037 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1038 } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
1040 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1056 isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1070 MBCSPostprocess(mbcsData, staticData);
1076 transformEUC(MBCSData *mbcsData) {
1081 oldLength=mbcsData->ucm->states.maxCharLength;
1086 old3Top=mbcsData->stage3Top;
1091 p8=mbcsData->fromUBytes;
1107 p8=mbcsData->fromUBytes;
1110 mbcsData->ucm->states.outputType=(int8_t)(MBCS_OUTPUT_3_EUC+oldLength-3);
1111 mbcsData->stage3Top=(old3Top*(oldLength-1))/oldLength;
1173 singleCompactStage2(MBCSData *mbcsData) {
1183 while(start<mbcsData->stage2Top) {
1187 for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2Single[start+i]==0 && mbcsData->stage2Single[prevEnd-i]==0; ++i) {}
1195 mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++];
1201 mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++];
1210 if(VERBOSE && newStart<mbcsData->stage2Top) {
1212 (unsigned long)mbcsData->stage2Top, (unsigned long)newStart,
1213 (long)(mbcsData->stage2Top-newStart)*2);
1215 mbcsData->stage2Top=newStart;
1219 mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT];
1225 singleCompactStage3(MBCSData *mbcsData) {
1226 uint16_t *stage3=(uint16_t *)mbcsData->fromUBytes;
1237 while(start<mbcsData->stage3Top) {
1264 if(VERBOSE && newStart<mbcsData->stage3Top) {
1266 (unsigned long)mbcsData->stage3Top, (unsigned long)newStart,
1267 (long)(mbcsData->stage3Top-newStart)*2);
1269 mbcsData->stage3Top=newStart;
1272 for(i=0; i<mbcsData->stage2Top; ++i) {
1273 mbcsData->stage2Single[i]=map[mbcsData->stage2Single[i]>>4];
1285 compactStage2(MBCSData *mbcsData) {
1295 while(start<mbcsData->stage2Top) {
1299 for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2[start+i]==0 && mbcsData->stage2[prevEnd-i]==0; ++i) {}
1307 mbcsData->stage2[newStart++]=mbcsData->stage2[start++];
1313 mbcsData->stage2[newStart++]=mbcsData->stage2[start++];
1322 if(VERBOSE && newStart<mbcsData->stage2Top) {
1324 (unsigned long)mbcsData->stage2Top, (unsigned long)newStart,
1325 (long)(mbcsData->stage2Top-newStart)*4);
1327 mbcsData->stage2Top=newStart;
1331 mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT];
1336 MBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData * /*staticData*/) {
1340 states=&mbcsData->ucm->states;
1344 &mbcsData->unicodeCodeUnits,
1345 mbcsData->toUFallbacks, mbcsData->countToUFallbacks,
1349 if(transformEUC(mbcsData)) {
1363 if(!mbcsData->utf8Friendly) {
1365 singleCompactStage3(mbcsData);
1366 singleCompactStage2(mbcsData);
1368 compactStage2(mbcsData);
1377 (unsigned long)mbcsData->stage2Top,
1378 (unsigned long)mbcsData->stage2Top);
1381 (unsigned long)mbcsData->stage3Top/stage3Width,
1382 (unsigned long)mbcsData->stage3Top/stage3Width);
1386 i2=mbcsData->stage1[i1];
1393 i3=mbcsData->stage2Single[i2];
1395 i3=(uint16_t)mbcsData->stage2[i2];
1416 MBCSData *mbcsData=(MBCSData *)cnvData;
1424 stage2Length=mbcsData->stage2Top;
1425 if(mbcsData->omitFromU) {
1427 int32_t utf8Limit=(int32_t)mbcsData->utf8Max+1;
1431 if((utf8Limit&((1<<MBCS_STAGE_1_SHIFT)-1))!=0 && (st2=mbcsData->stage1[i])!=0) {
1436 while(i>0 && (st2=mbcsData->stage1[--i])==0) {}
1446 (unsigned long)mbcsData->stage2Top,
1447 (unsigned long)mbcsData->stage3Top);
1448 printf("+ total size savings: %lu bytes\n", (unsigned long)stage2Start*4+mbcsData->stage3Top);
1461 if(mbcsData->ucm->states.maxCharLength==1) {
1463 mbcsData->stage1[i]+=(uint16_t)stage1Top;
1471 mbcsData->stage3Top*=2;
1473 if(mbcsData->utf8Friendly) {
1478 mbcsData->stage1[i]+=(uint16_t)stage1Top/2; /* stage 2 contains 32-bit entries, stage 1 16-bit entries */
1485 if(mbcsData->utf8Friendly) {
1486 stageUTF8Length=(mbcsData->utf8Max+1)>>MBCS_UTF8_STAGE_SHIFT;
1487 header.version[2]=(uint8_t)(mbcsData->utf8Max>>8); /* store 0xd7 for max==0xd7ff */
1494 mbcsData->stage3Top=(mbcsData->stage3Top+3)&~3;
1513 header.countStates=mbcsData->ucm->states.countStates;
1514 header.countToUFallbacks=mbcsData->countToUFallbacks;
1518 mbcsData->ucm->states.countStates*1024+
1519 mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback);
1522 mbcsData->ucm->states.countToUCodeUnits*2;
1527 header.fromUBytesLength=mbcsData->stage3Top;
1534 header.flags=(uint8_t)(mbcsData->ucm->states.outputType);
1547 udata_writeBlock(pData, mbcsData->ucm->states.stateTable, header.countStates*1024);
1548 udata_writeBlock(pData, mbcsData->toUFallbacks, mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback));
1549 udata_writeBlock(pData, mbcsData->unicodeCodeUnits, mbcsData->ucm->states.countToUCodeUnits*2);
1550 udata_writeBlock(pData, mbcsData->stage1, stage1Top*2);
1551 if(mbcsData->ucm->states.maxCharLength==1) {
1552 udata_writeBlock(pData, mbcsData->stage2Single+stage2Start, stage2Length);
1554 udata_writeBlock(pData, mbcsData->stage2+stage2Start, stage2Length);
1557 udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top);
1561 udata_writeBlock(pData, mbcsData->stageUTF8, stageUTF8Length*2);